Update LLVM to r98631.

author: rdivacky <rdivacky@FreeBSD.org> 2010-03-16 16:51:38 +0000
committer: rdivacky <rdivacky@FreeBSD.org> 2010-03-16 16:51:38 +0000
commit: 0f448b841684305c051796982f300c9bff959307 (patch)
tree: 458dd25677a43aef6390ecadb4423817f00e08b0 /lib
parent: 9e2446b38c94db61b2416c28fee415c03663c11c (diff)
download: FreeBSD-src-0f448b841684305c051796982f300c9bff959307.zip
FreeBSD-src-0f448b841684305c051796982f300c9bff959307.tar.gz
194 files changed, 5359 insertions, 3736 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 114db2d..96bb027 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -589,6 +589,30 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
   APInt Offset = APInt(BitWidth,
                        TD->getIndexedOffset(Ptr->getType(),
                                             (Value**)Ops+1, NumOps-1));
+  Ptr = cast<Constant>(Ptr->stripPointerCasts());
+
+  // If this is a GEP of a GEP, fold it all into a single GEP.
+  while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+    SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+
+    // Do not try the incorporate the sub-GEP if some index is not a number.
+    bool AllConstantInt = true;
+    for (unsigned i = 0, e = NestedOps.size(); i != e; ++i)
+      if (!isa<ConstantInt>(NestedOps[i])) {
+        AllConstantInt = false;
+        break;
+      }
+    if (!AllConstantInt)
+      break;
+
+    Ptr = cast<Constant>(GEP->getOperand(0));
+    Offset += APInt(BitWidth,
+                    TD->getIndexedOffset(Ptr->getType(),
+                                         (Value**)NestedOps.data(),
+                                         NestedOps.size()));
+    Ptr = cast<Constant>(Ptr->stripPointerCasts());
+  }
+
   // If the base value for this address is a literal integer value, fold the
   // getelementptr to the resulting integer value casted to the pointer type.
   if (BaseIsInt) {
@@ -600,7 +624,6 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
   // we eliminate over-indexing of the notional static type array bounds.
   // This makes it easy to determine if the getelementptr is "inbounds".
   // Also, this helps GlobalOpt do SROA on GlobalVariables.
-  Ptr = cast<Constant>(Ptr->stripPointerCasts());
   const Type *Ty = Ptr->getType();
   SmallVector<Constant*, 32> NewIdxs;
   do {
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 0f1f93b..5b8b534 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
 // instructions will be constant folded if the specified value is constant.
 //
 unsigned InlineCostAnalyzer::FunctionInfo::
-         CountCodeReductionForConstant(Value *V) {
+CountCodeReductionForConstant(Value *V) {
   unsigned Reduction = 0;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
     if (isa<BranchInst>(*UI) || isa<SwitchInst>(*UI)) {
@@ -31,7 +31,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
       const unsigned NumSucc = TI.getNumSuccessors();
       unsigned Instrs = 0;
       for (unsigned I = 0; I != NumSucc; ++I)
-        Instrs += TI.getSuccessor(I)->size();
+        Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)];
       // We don't know which blocks will be eliminated, so use the average size.
       Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
     } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
@@ -120,7 +120,7 @@ static bool callIsSmall(const Function *F) {
   StringRef Name = F->getName();
   
   // These will all likely lower to a single selection DAG node.
-  if (Name == "copysign" || Name == "copysignf" ||
+  if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
       Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
       Name == "sin" || Name == "sinf" || Name == "sinl" ||
       Name == "cos" || Name == "cosf" || Name == "cosl" ||
@@ -142,7 +142,7 @@ static bool callIsSmall(const Function *F) {
 /// from the specified block.
 void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
   ++NumBlocks;
-
+  unsigned NumInstsBeforeThisBB = NumInsts;
   for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
        II != E; ++II) {
     if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
@@ -208,6 +208,9 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
   // function which is extremely undefined behavior.
   if (isa<IndirectBrInst>(BB->getTerminator()))
     NeverInline = true;
+
+  // Remember NumInsts for this BB.
+  NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
 }
 
 /// analyzeFunction - Fill in the current structure with information gleaned
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 2139c29..1001d2b 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -263,14 +263,7 @@ unsigned Loop::getSmallConstantTripMultiple() const {
 }
 
 /// isLCSSAForm - Return true if the Loop is in LCSSA form
-bool Loop::isLCSSAForm() const {
-  // Collect all the reachable blocks in the function, for fast lookups.
-  SmallPtrSet<BasicBlock *, 32> ReachableBBs;
-  BasicBlock *EntryBB = getHeader()->getParent()->begin();
-  for (df_iterator<BasicBlock *> NI = df_begin(EntryBB),
-       NE = df_end(EntryBB); NI != NE; ++NI)
-    ReachableBBs.insert(*NI);
-
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
   // Sort the blocks vector so that we can use binary search to do quick
   // lookups.
   SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
@@ -290,7 +283,7 @@ bool Loop::isLCSSAForm() const {
         // entry are special; uses in them don't need to go through PHIs.
         if (UserBB != BB &&
             !LoopBBs.count(UserBB) &&
-            ReachableBBs.count(UserBB))
+            DT.isReachableFromEntry(UserBB))
           return false;
       }
   }
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 92cbb7c..5ae72f7 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -779,7 +779,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
     for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
       if (Tmp == 1) return Tmp;
       Tmp = std::min(Tmp,
-                     ComputeNumSignBits(PN->getIncomingValue(1), TD, Depth+1));
+                     ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1));
     }
     return Tmp;
   }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4978fba..2636e2c 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -55,12 +55,13 @@ using namespace llvm;
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
 char AsmPrinter::ID = 0;
+
 AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
-                       MCContext &Ctx, MCStreamer &Streamer,
-                       const MCAsmInfo *T)
+                       MCStreamer &Streamer)
   : MachineFunctionPass(&ID), O(o),
-    TM(tm), MAI(T), TRI(tm.getRegisterInfo()),
-    OutContext(Ctx), OutStreamer(Streamer),
+    TM(tm), MAI(tm.getMCAsmInfo()), TRI(tm.getRegisterInfo()),
+    OutContext(Streamer.getContext()),
+    OutStreamer(Streamer),
     LastMI(0), LastFn(0), Counter(~0U), SetCounter(0), PrevDLT(NULL) {
   DW = 0; MMI = 0;
   VerboseAsm = Streamer.isVerboseAsm();
@@ -72,7 +73,6 @@ AsmPrinter::~AsmPrinter() {
     delete I->second;
   
   delete &OutStreamer;
-  delete &OutContext;
 }
 
 /// getFunctionNumber - Return a unique ID for the current function.
@@ -94,17 +94,21 @@ const MCSection *AsmPrinter::getCurrentSection() const {
 void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<MachineModuleInfo>();
   AU.addRequired<GCModuleInfo>();
   if (VerboseAsm)
     AU.addRequired<MachineLoopInfo>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  MMI->AnalyzeModule(M);
+
   // Initialize TargetLoweringObjectFile.
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
   
-  Mang = new Mangler(*MAI);
+  Mang = new Mangler(OutContext, *TM.getTargetData());
   
   // Allow the target to emit any magic that it wants at the start of the file.
   EmitStartOfAsmFile(M);
@@ -128,9 +132,6 @@ bool AsmPrinter::doInitialization(Module &M) {
       << '\n' << MAI->getCommentString()
       << " End of file scope inline assembly\n";
 
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  if (MMI)
-    MMI->AnalyzeModule(M);
   DW = getAnalysisIfAvailable<DwarfWriter>();
   if (DW)
     DW->BeginModule(&M, MMI, O, this, MAI);
@@ -193,7 +194,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (EmitSpecialLLVMGlobal(GV))
     return;
 
-  MCSymbol *GVSym = GetGlobalValueSymbol(GV);
+  MCSymbol *GVSym = Mang->getSymbol(GV);
   EmitVisibility(GVSym, GV->getVisibility());
 
   if (MAI->hasDotTypeDotSizeDirective())
@@ -306,6 +307,16 @@ void AsmPrinter::EmitFunctionHeader() {
   // do their wild and crazy things as required.
   EmitFunctionEntryLabel();
   
+  // If the function had address-taken blocks that got deleted, then we have
+  // references to the dangling symbols.  Emit them at the start of the function
+  // so that we don't get references to undefined symbols.
+  std::vector<MCSymbol*> DeadBlockSyms;
+  MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+  for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+    OutStreamer.AddComment("Address taken block that was later removed");
+    OutStreamer.EmitLabel(DeadBlockSyms[i]);
+  }
+  
   // Add some workaround for linkonce linkage on Cygwin\MinGW.
   if (MAI->getLinkOnceDirective() != 0 &&
       (F->hasLinkOnceLinkage() || F->hasWeakLinkage()))
@@ -477,14 +488,12 @@ bool AsmPrinter::doFinalization(Module &M) {
     for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
          I != E; ++I) {
       if (!I->hasExternalWeakLinkage()) continue;
-      OutStreamer.EmitSymbolAttribute(GetGlobalValueSymbol(I),
-                                      MCSA_WeakReference);
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
     }
     
     for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
       if (!I->hasExternalWeakLinkage()) continue;
-      OutStreamer.EmitSymbolAttribute(GetGlobalValueSymbol(I),
-                                      MCSA_WeakReference);
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
     }
   }
 
@@ -492,10 +501,10 @@ bool AsmPrinter::doFinalization(Module &M) {
     OutStreamer.AddBlankLine();
     for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
          I != E; ++I) {
-      MCSymbol *Name = GetGlobalValueSymbol(I);
+      MCSymbol *Name = Mang->getSymbol(I);
 
       const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
-      MCSymbol *Target = GetGlobalValueSymbol(GV);
+      MCSymbol *Target = Mang->getSymbol(GV);
 
       if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
         OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
@@ -539,7 +548,7 @@ bool AsmPrinter::doFinalization(Module &M) {
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
   // Get the function symbol.
-  CurrentFnSym = GetGlobalValueSymbol(MF.getFunction());
+  CurrentFnSym = Mang->getSymbol(MF.getFunction());
 
   if (VerboseAsm)
     LI = &getAnalysis<MachineLoopInfo>();
@@ -651,6 +660,7 @@ void AsmPrinter::EmitConstantPool() {
 void AsmPrinter::EmitJumpTableInfo() {
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (MJTI == 0) return;
+  if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
 
@@ -698,7 +708,7 @@ void AsmPrinter::EmitJumpTableInfo() {
         
         // .set LJTSet, LBB32-base
         const MCExpr *LHS =
-          MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+          MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
         OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
                                 MCBinaryExpr::CreateSub(LHS, Base, OutContext));
       }
@@ -727,6 +737,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
                                     unsigned UID) const {
   const MCExpr *Value = 0;
   switch (MJTI->getEntryKind()) {
+  case MachineJumpTableInfo::EK_Inline:
+    llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
   case MachineJumpTableInfo::EK_Custom32:
     Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
                                                               OutContext);
@@ -734,13 +746,13 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   case MachineJumpTableInfo::EK_BlockAddress:
     // EK_BlockAddress - Each entry is a plain address of block, e.g.:
     //     .word LBB123
-    Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
     break;
   case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
     // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
     // with a relocation as gp-relative, e.g.:
     //     .gprel32 LBB123
-    MCSymbol *MBBSym = MBB->getSymbol(OutContext);
+    MCSymbol *MBBSym = MBB->getSymbol();
     OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
     return;
   }
@@ -764,7 +776,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
       break;
     }
     // Otherwise, use the difference as the jump table entry.
-    Value = MCSymbolRefExpr::Create(MBB->getSymbol(OutContext), OutContext);
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
     const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
     Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
     break;
@@ -842,8 +854,7 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) {
     const GlobalValue *GV =
       dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
     if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
-      OutStreamer.EmitSymbolAttribute(GetGlobalValueSymbol(GV),
-                                      MCSA_NoDeadStrip);
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip);
   }
 }
 
@@ -960,7 +971,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
     return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
   
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
-    return MCSymbolRefExpr::Create(AP.GetGlobalValueSymbol(GV), Ctx);
+    return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
   if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
     return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
   
@@ -1308,6 +1319,8 @@ void AsmPrinter::processDebugLoc(const MachineInstr *MI,
   if (!MAI || !DW || !MAI->doesSupportDebugInformation()
       || !DW->ShouldEmitDwarfDebug())
     return;
+  if (MI->getOpcode() == TargetOpcode::DBG_VALUE)
+    return;
   DebugLoc DL = MI->getDebugLoc();
   if (DL.isUnknown())
     return;
@@ -1499,7 +1512,7 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
           ++OpNo;  // Skip over the ID number.
 
           if (Modifier[0] == 'l')  // labels are target independent
-            O << *MI->getOperand(OpNo).getMBB()->getSymbol(OutContext);
+            O << *MI->getOperand(OpNo).getMBB()->getSymbol();
           else {
             AsmPrinter *AP = const_cast<AsmPrinter*>(this);
             if ((OpFlags & 7) == 4) {
@@ -1552,17 +1565,7 @@ void AsmPrinter::printKill(const MachineInstr *MI) const {
 /// printLabel - This method prints a local label used by debug and
 /// exception handling tables.
 void AsmPrinter::printLabelInst(const MachineInstr *MI) const {
-  MCSymbol *Sym = 
-    OutContext.GetOrCreateTemporarySymbol(Twine(MAI->getPrivateGlobalPrefix()) +
-                                 "label" + Twine(MI->getOperand(0).getImm()));
-  OutStreamer.EmitLabel(Sym);
-}
-
-void AsmPrinter::printLabel(unsigned Id) const {
-  MCSymbol *Sym = 
-    OutContext.GetOrCreateTemporarySymbol(Twine(MAI->getPrivateGlobalPrefix()) +
-                                          "label" + Twine(Id));
-  OutStreamer.EmitLabel(Sym);
+  OutStreamer.EmitLabel(MI->getOperand(0).getMCSymbol());
 }
 
 /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
@@ -1582,28 +1585,11 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
 }
 
 MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
-  return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock());
+  return MMI->getAddrLabelSymbol(BA->getBasicBlock());
 }
 
-MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
-                                            const BasicBlock *BB) const {
-  assert(BB->hasName() &&
-         "Address of anonymous basic block not supported yet!");
-
-  // This code must use the function name itself, and not the function number,
-  // since it must be possible to generate the label name from within other
-  // functions.
-  SmallString<60> FnName;
-  Mang->getNameWithPrefix(FnName, F, false);
-
-  // FIXME: THIS IS BROKEN IF THE LLVM BASIC BLOCK DOESN'T HAVE A NAME!
-  SmallString<60> NameResult;
-  Mang->getNameWithPrefix(NameResult,
-                          StringRef("BA") + Twine((unsigned)FnName.size()) + 
-                          "_" + FnName.str() + "_" + BB->getName(), 
-                          Mangler::Private);
-
-  return OutContext.GetOrCreateTemporarySymbol(NameResult.str());
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+  return MMI->getAddrLabelSymbol(BB);
 }
 
 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
@@ -1626,17 +1612,6 @@ MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
    Twine(UID) + "_set_" + Twine(MBBID));
 }
 
-/// GetGlobalValueSymbol - Return the MCSymbol for the specified global
-/// value.
-MCSymbol *AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const {
-  SmallString<60> NameStr;
-  Mang->getNameWithPrefix(NameStr, GV, false);
-  
-  if (!GV->hasPrivateLinkage())
-    return OutContext.GetOrCreateSymbol(NameStr.str());
-  return OutContext.GetOrCreateTemporarySymbol(NameStr.str());
-}
-
 /// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
 /// global value name as its base, with the specified suffix, and where the
 /// symbol is forced to have private linkage if ForcePrivate is true.
@@ -1734,16 +1709,19 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
   if (unsigned Align = MBB->getAlignment())
     EmitAlignment(Log2_32(Align));
 
-  // If the block has its address taken, emit a special label to satisfy
-  // references to the block. This is done so that we don't need to
-  // remember the number of this label, and so that we can make
-  // forward references to labels without knowing what their numbers
-  // will be.
+  // If the block has its address taken, emit any labels that were used to
+  // reference the block.  It is possible that there is more than one label
+  // here, because multiple LLVM BB's may have been RAUW'd to this block after
+  // the references were generated.
   if (MBB->hasAddressTaken()) {
     const BasicBlock *BB = MBB->getBasicBlock();
     if (VerboseAsm)
-      OutStreamer.AddComment("Address Taken");
-    OutStreamer.EmitLabel(GetBlockAddressSymbol(BB->getParent(), BB));
+      OutStreamer.AddComment("Block address taken");
+    
+    std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
+
+    for (unsigned i = 0, e = Syms.size(); i != e; ++i)
+      OutStreamer.EmitLabel(Syms[i]);
   }
 
   // Print the main label for the block.
@@ -1766,7 +1744,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
       PrintBasicBlockLoopComments(*MBB, LI, *this);
     }
 
-    OutStreamer.EmitLabel(MBB->getSymbol(OutContext));
+    OutStreamer.EmitLabel(MBB->getSymbol());
   }
 }
 
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index e08d748..c5909fa 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -112,7 +112,6 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   /// DIE - A structured debug information entry.  Has an abbreviation which
   /// describes it's organization.
-  class CompileUnit;
   class DIEValue;
 
   class DIE {
@@ -159,7 +158,6 @@ namespace llvm {
     void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
     void setOffset(unsigned O) { Offset = O; }
     void setSize(unsigned S) { Size = S; }
-    void setParent(DIE *P) { Parent = P; }
     
     /// addValue - Add a value and attributes to a DIE.
     ///
@@ -185,7 +183,7 @@ namespace llvm {
       }
       Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
       Children.push_back(Child);
-      Child->setParent(this);
+      Child->Parent = this;
     }
 
 #ifndef NDEBUG
@@ -393,12 +391,11 @@ namespace llvm {
   /// this class can also be used as a proxy for a debug information entry not
   /// yet defined (ie. types.)
   class DIEEntry : public DIEValue {
-    DIE *Entry;
+    DIE *const Entry;
   public:
     explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
 
     DIE *getEntry() const { return Entry; }
-    void setEntry(DIE *E) { Entry = E; }
 
     /// EmitValue - Emit debug information entry offset.
     ///
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 36be5b9..866f457 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -52,9 +53,9 @@ class CompileUnit {
 
   /// Die - Compile unit debug information entry.
   ///
-  DIE *CUDie;
+  const OwningPtr<DIE> CUDie;
 
-  /// IndexTyDie - An anonymous type for index type.
+  /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
   DIE *IndexTyDie;
 
   /// GVToDieMap - Tracks the mapping of unit level debug informaton
@@ -78,11 +79,10 @@ class CompileUnit {
 public:
   CompileUnit(unsigned I, DIE *D)
     : ID(I), CUDie(D), IndexTyDie(0) {}
-  ~CompileUnit() { delete CUDie; delete IndexTyDie; }
 
   // Accessors.
   unsigned getID()                  const { return ID; }
-  DIE* getCUDie()                   const { return CUDie; }
+  DIE* getCUDie()                   const { return CUDie.get(); }
   const StringMap<DIE*> &getGlobals()     const { return Globals; }
   const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
 
@@ -148,16 +148,21 @@ public:
 class DbgVariable {
   DIVariable Var;                    // Variable Descriptor.
   unsigned FrameIndex;               // Variable frame index.
-  DbgVariable *AbstractVar;          // Abstract variable for this variable.
+  const MachineInstr *DbgValueMInsn; // DBG_VALUE
+  DbgVariable *const AbstractVar;    // Abstract variable for this variable.
   DIE *TheDIE;
 public:
-  DbgVariable(DIVariable V, unsigned I)
-    : Var(V), FrameIndex(I), AbstractVar(0), TheDIE(0)  {}
+  // AbsVar may be NULL.
+  DbgVariable(DIVariable V, unsigned I, DbgVariable *AbsVar)
+    : Var(V), FrameIndex(I), DbgValueMInsn(0), AbstractVar(AbsVar), TheDIE(0) {}
+  DbgVariable(DIVariable V, const MachineInstr *MI, DbgVariable *AbsVar)
+    : Var(V), FrameIndex(0), DbgValueMInsn(MI), AbstractVar(AbsVar), TheDIE(0)
+    {}
 
   // Accessors.
   DIVariable getVariable()           const { return Var; }
   unsigned getFrameIndex()           const { return FrameIndex; }
-  void setAbstractVariable(DbgVariable *V) { AbstractVar = V; }
+  const MachineInstr *getDbgValue()  const { return DbgValueMInsn; }
   DbgVariable *getAbstractVariable() const { return AbstractVar; }
   void setDIE(DIE *D)                      { TheDIE = D; }
   DIE *getDIE()                      const { return TheDIE; }
@@ -176,8 +181,10 @@ class DbgScope {
   MCSymbol *EndLabel;                 // Label ID of the end of scope.
   const MachineInstr *LastInsn;       // Last instruction of this scope.
   const MachineInstr *FirstInsn;      // First instruction of this scope.
-  SmallVector<DbgScope *, 4> Scopes;  // Scopes defined in scope.
-  SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
+  // Scopes defined in scope.  Contents not owned.
+  SmallVector<DbgScope *, 4> Scopes;
+  // Variables declared in scope.  Contents owned.
+  SmallVector<DbgVariable *, 8> Variables;
 
   // Private state for dump()
   mutable unsigned IndentLevel;
@@ -192,14 +199,12 @@ public:
   DbgScope *getParent()          const { return Parent; }
   void setParent(DbgScope *P)          { Parent = P; }
   DIDescriptor getDesc()         const { return Desc; }
-  MDNode *getInlinedAt()         const {
-    return InlinedAtLocation;
-  }
+  MDNode *getInlinedAt()         const { return InlinedAtLocation; }
   MDNode *getScopeNode()         const { return Desc.getNode(); }
   MCSymbol *getStartLabel()      const { return StartLabel; }
   MCSymbol *getEndLabel()        const { return EndLabel; }
-  SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
-  SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
+  const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
+  const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
   void setStartLabel(MCSymbol *S) { StartLabel = S; }
   void setEndLabel(MCSymbol *E)   { EndLabel = E; }
   void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
@@ -222,14 +227,14 @@ public:
     assert (getFirstInsn() && "First instruction is missing!");
     
     // Use the end of last child scope as end of this scope.
-    SmallVector<DbgScope *, 4> &Scopes = getScopes();
+    const SmallVector<DbgScope *, 4> &Scopes = getScopes();
     const MachineInstr *LastInsn = getFirstInsn();
     unsigned LIndex = 0;
     if (Scopes.empty()) {
       assert (getLastInsn() && "Inner most scope does not have last insn!");
       return;
     }
-    for (SmallVector<DbgScope *, 4>::iterator SI = Scopes.begin(),
+    for (SmallVector<DbgScope *, 4>::const_iterator SI = Scopes.begin(),
            SE = Scopes.end(); SI != SE; ++SI) {
       DbgScope *DS = *SI;
       DS->fixInstructionMarkers(MIIndexMap);
@@ -281,8 +286,6 @@ void DbgScope::dump() const {
 #endif
 
 DbgScope::~DbgScope() {
-  for (unsigned i = 0, N = Scopes.size(); i < N; ++i)
-    delete Scopes[i];
   for (unsigned j = 0, M = Variables.size(); j < M; ++j)
     delete Variables[j];
 }
@@ -292,9 +295,9 @@ DbgScope::~DbgScope() {
 DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
   : DwarfPrinter(OS, A, T), ModuleCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
-    DIEValues(), StringPool(),
-    SectionSourceLines(), didInitial(false), shouldEmit(false),
+    DIEValues(), SectionSourceLines(), didInitial(false), shouldEmit(false),
     CurrentFnDbgScope(0), DebugTimer(0) {
+  NextStringPoolNumber = 0;
   if (TimePassesIsEnabled)
     DebugTimer = new Timer("Dwarf Debug Writer");
 }
@@ -305,6 +308,15 @@ DwarfDebug::~DwarfDebug() {
   delete DebugTimer;
 }
 
+MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
+  std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
+  if (Entry.first) return Entry.first;
+
+  Entry.second = NextStringPoolNumber++;
+  return Entry.first = getDWLabel("string", Entry.second);
+}
+
+
 /// assignAbbrevNumber - Define a unique number for the abbreviation.
 ///
 void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
@@ -820,14 +832,13 @@ void DwarfDebug::addType(DIE *Entity, DIType Ty) {
     return;
   }
 
-  // Set up proxy.
-  Entry = createDIEEntry();
-  ModuleCU->insertDIEEntry(Ty.getNode(), Entry);
-
   // Construct type.
   DIE *Buffer = getOrCreateTypeDIE(Ty);
 
-  Entry->setEntry(Buffer);
+  // Set up proxy.
+  Entry = createDIEEntry(Buffer);
+  ModuleCU->insertDIEEntry(Ty.getNode(), Entry);
+
   Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
 }
 
@@ -1315,24 +1326,25 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
 /// If there are global variables in this scope then create and insert
 /// DIEs for these variables.
 DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
-
  DIE *SPDie = ModuleCU->getDIE(SPNode);
- assert (SPDie && "Unable to find subprogram DIE!");
+ assert(SPDie && "Unable to find subprogram DIE!");
  DISubprogram SP(SPNode);
+  
  // There is not any need to generate specification DIE for a function
  // defined at compile unit level. If a function is defined inside another
  // function then gdb prefers the definition at top level and but does not
  // expect specification DIE in parent function. So avoid creating 
  // specification DIE for a function defined inside a function.
- if (SP.isDefinition() && !SP.getContext().isCompileUnit()
-     && !SP.getContext().isSubprogram()) {
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+     !SP.getContext().isFile() && !SP.getContext().isSubprogram()) {
    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-  // Add arguments. 
+   
+   // Add arguments. 
    DICompositeType SPTy = SP.getType();
    DIArray Args = SPTy.getTypeArray();
    unsigned SPTag = SPTy.getTag();
    if (SPTag == dwarf::DW_TAG_subroutine_type)
-     for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
+     for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
        DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
        addType(Arg, ATy);
@@ -1421,9 +1433,6 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   } else
     I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
 
-  StringPool.insert(InlinedSP.getName());
-  StringPool.insert(getRealLinkageName(InlinedSP.getLinkageName()));
-
   DILocation DL(Scope->getInlinedAt());
   addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
   addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
@@ -1489,17 +1498,41 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
 
   // Add variable address.
   if (!Scope->isAbstractScope()) {
-    MachineLocation Location;
-    unsigned FrameReg;
-    int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
-    Location.set(FrameReg, Offset);
-
-    if (VD.hasComplexAddress())
-      addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-    else if (VD.isBlockByrefVariable())
-      addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-    else
-      addAddress(VariableDie, dwarf::DW_AT_location, Location);
+    // Check if variable is described by DBG_VALUE instruction.
+    if (const MachineInstr *DbgValueInsn = DV->getDbgValue()) {
+      if (DbgValueInsn->getNumOperands() == 3) {
+        // FIXME : Handle getNumOperands != 3 
+        if (DbgValueInsn->getOperand(0).getType() 
+            == MachineOperand::MO_Register
+            && DbgValueInsn->getOperand(0).getReg()) {
+          MachineLocation Location;
+          Location.set(DbgValueInsn->getOperand(0).getReg());
+          addAddress(VariableDie, dwarf::DW_AT_location, Location);
+        } else if (DbgValueInsn->getOperand(0).getType() == 
+                   MachineOperand::MO_Immediate) {
+          DIEBlock *Block = new DIEBlock();
+          unsigned Imm = DbgValueInsn->getOperand(0).getImm();
+          addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
+          addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block);
+        } else {
+          //FIXME : Handle other operand types.
+          delete VariableDie;
+          return NULL;
+        }
+      } 
+    } else {
+      MachineLocation Location;
+      unsigned FrameReg;
+      int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
+      Location.set(FrameReg, Offset);
+      
+      if (VD.hasComplexAddress())
+        addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+      else if (VD.isBlockByrefVariable())
+        addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+      else
+        addAddress(VariableDie, dwarf::DW_AT_location, Location);
+    }
   }
 
   if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
@@ -1549,7 +1582,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
   }
   
   // Add variables to scope.
-  SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+  const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
   for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
     DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
     if (VariableDIE)
@@ -1557,7 +1590,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
   }
 
   // Add nested scopes.
-  SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+  const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
   for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
     // Define the Scope debug information entry.
     DIE *NestedDIE = constructScopeDIE(Scopes[j]);
@@ -1622,8 +1655,12 @@ DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
   return NDie;
 }
 
-CompileUnit *DwarfDebug::constructCompileUnit(MDNode *N) {
+void DwarfDebug::constructCompileUnit(MDNode *N) {
   DICompileUnit DIUnit(N);
+  // Use first compile unit marked as isMain as the compile unit for this
+  // module.
+  if (ModuleCU || !DIUnit.isMain())
+    return;
   StringRef FN = DIUnit.getFilename();
   StringRef Dir = DIUnit.getDirectory();
   unsigned ID = GetOrCreateSourceID(Dir, FN);
@@ -1653,14 +1690,9 @@ CompileUnit *DwarfDebug::constructCompileUnit(MDNode *N) {
     addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
-  CompileUnit *Unit = new CompileUnit(ID, Die);
-  if (!ModuleCU && DIUnit.isMain()) {
-    // Use first compile unit marked as isMain as the compile unit
-    // for this module.
-    ModuleCU = Unit;
-  }
-
-  return Unit;
+  assert(!ModuleCU &&
+         "ModuleCU assigned since the top of constructCompileUnit");
+  ModuleCU = new CompileUnit(ID, Die);
 }
 
 void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
@@ -1686,7 +1718,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   // Do not create specification DIE if context is either compile unit
   // or a subprogram.
   if (DI_GV.isDefinition() && !GVContext.isCompileUnit()
-      && !GVContext.isSubprogram()) {
+      && !GVContext.isFile() && !GVContext.isSubprogram()) {
     // Create specification DIE.
     DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
     addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
@@ -1694,7 +1726,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
     DIEBlock *Block = new DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
     addLabel(Block, 0, dwarf::DW_FORM_udata,
-             Asm->GetGlobalValueSymbol(DI_GV.getGlobal()));
+             Asm->Mang->getSymbol(DI_GV.getGlobal()));
     addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
     addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
     ModuleCU->addDie(VariableSpecDIE);
@@ -1702,7 +1734,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
     DIEBlock *Block = new DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
     addLabel(Block, 0, dwarf::DW_FORM_udata,
-             Asm->GetGlobalValueSymbol(DI_GV.getGlobal()));
+             Asm->Mang->getSymbol(DI_GV.getGlobal()));
     addBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
   }
   addToContextOwner(VariableDie, GVContext);
@@ -1880,9 +1912,6 @@ void DwarfDebug::endModule() {
   // Emit info into a debug pubtypes section.
   emitDebugPubTypes();
 
-  // Emit info into a debug str section.
-  emitDebugStr();
-
   // Emit info into a debug loc section.
   emitDebugLoc();
 
@@ -1898,6 +1927,12 @@ void DwarfDebug::endModule() {
   // Emit inline info.
   emitDebugInlineInfo();
 
+  // Emit info into a debug str section.
+  emitDebugStr();
+  
+  delete ModuleCU;
+  ModuleCU = NULL;  // Reset for the next Module, if any.
+
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 }
@@ -1915,7 +1950,29 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
   if (!Scope)
     return NULL;
 
-  AbsDbgVariable = new DbgVariable(Var, FrameIdx);
+  AbsDbgVariable = new DbgVariable(Var, FrameIdx,
+                                   NULL /* No more-abstract variable*/);
+  Scope->addVariable(AbsDbgVariable);
+  AbstractVariables[Var.getNode()] = AbsDbgVariable;
+  return AbsDbgVariable;
+}
+
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+/// FIXME : Refactor findAbstractVariable.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+                                              const MachineInstr *MI,
+                                              DILocation &ScopeLoc) {
+
+  DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
+  if (AbsDbgVariable)
+    return AbsDbgVariable;
+
+  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode());
+  if (!Scope)
+    return NULL;
+
+  AbsDbgVariable = new DbgVariable(Var, MI, 
+                                   NULL /* No more-abstract variable*/);
   Scope->addVariable(AbsDbgVariable);
   AbstractVariables[Var.getNode()] = AbsDbgVariable;
   return AbsDbgVariable;
@@ -1942,11 +1999,46 @@ void DwarfDebug::collectVariableInfo() {
     if (!Scope)
       continue;
 
-    DbgVariable *RegVar = new DbgVariable(DV, VP.first);
+    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc);
+    DbgVariable *RegVar = new DbgVariable(DV, VP.first, AbsDbgVariable);
     Scope->addVariable(RegVar);
-    if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first,
-                                                           ScopeLoc))
-      RegVar->setAbstractVariable(AbsDbgVariable);
+  }
+
+  // Collect variable information from DBG_VALUE machine instructions;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      if (MInsn->getOpcode() != TargetOpcode::DBG_VALUE)
+        continue;
+      // FIXME : Lift this restriction.
+      if (MInsn->getNumOperands() != 3)
+        continue;
+      DIVariable DV((MDNode*)(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata()));
+      if (DV.getTag() == dwarf::DW_TAG_arg_variable)  {
+        // FIXME Handle inlined subroutine arguments.
+        DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL);
+        CurrentFnDbgScope->addVariable(ArgVar);
+        continue;
+      }
+
+      DebugLoc DL = MInsn->getDebugLoc();
+      if (DL.isUnknown()) continue;
+      DILocation ScopeLoc = MF->getDILocation(DL);
+      DbgScope *Scope =
+        ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
+      if (!Scope)
+        Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode());
+      // If variable scope is not found then skip this variable.
+      if (!Scope)
+        continue;
+
+      DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn,
+                                                         ScopeLoc);
+      DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable);
+      Scope->addVariable(RegVar);
+    }
   }
 }
 
@@ -1967,7 +2059,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
   if (I == DbgScopeEndMap.end())
     return;
 
-  MCSymbol *Label = getDWLabel("label", MMI->NextLabelID());
+  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
   Asm->OutStreamer.EmitLabel(Label);
 
   SmallVector<DbgScope*, 2> &SD = I->second;
@@ -2017,6 +2109,8 @@ bool DwarfDebug::extractScopeInformation() {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
+      // FIXME : Remove DBG_VALUE check.
+      if (MInsn->getOpcode() == TargetOpcode::DBG_VALUE) continue;
       MIIndexMap[MInsn] = MIIndex++;
       DebugLoc DL = MInsn->getDebugLoc();
       if (DL.isUnknown()) continue;
@@ -2037,6 +2131,8 @@ bool DwarfDebug::extractScopeInformation() {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
+      // FIXME : Remove DBG_VALUE check.
+      if (MInsn->getOpcode() == TargetOpcode::DBG_VALUE) continue;
       DebugLoc DL = MInsn->getDebugLoc();
       if (DL.isUnknown())  continue;
       DILocation DLT = MF->getDILocation(DL);
@@ -2065,9 +2161,9 @@ bool DwarfDebug::extractScopeInformation() {
   while (!WorkList.empty()) {
     DbgScope *S = WorkList.back(); WorkList.pop_back();
 
-    SmallVector<DbgScope *, 4> &Children = S->getScopes();
+    const SmallVector<DbgScope *, 4> &Children = S->getScopes();
     if (!Children.empty()) 
-      for (SmallVector<DbgScope *, 4>::iterator SI = Children.begin(),
+      for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
              SE = Children.end(); SI != SE; ++SI)
         WorkList.push_back(*SI);
 
@@ -2172,11 +2268,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
 
   // Clear debug info
   CurrentFnDbgScope = NULL;
-  DbgScopeMap.clear();
+  DeleteContainerSeconds(DbgScopeMap);
   DbgScopeBeginMap.clear();
   DbgScopeEndMap.clear();
   ConcreteScopes.clear();
+  DeleteContainerSeconds(AbstractScopes);
   AbstractScopesList.clear();
+  AbstractVariables.clear();
   Lines.clear();
   
   if (TimePassesIsEnabled)
@@ -2210,16 +2308,15 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
     Dir = DB.getDirectory();
     Fn = DB.getFilename();
   } else
-    assert (0 && "Unexpected scope info");
+    assert(0 && "Unexpected scope info");
 
   unsigned Src = GetOrCreateSourceID(Dir, Fn);
-  unsigned ID = MMI->NextLabelID();
-  Lines.push_back(SrcLineInfo(Line, Col, Src, ID));
+  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
+  Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
 
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 
-  MCSymbol *Label = getDWLabel("label", ID);
   Asm->OutStreamer.EmitLabel(Label);
   return Label;
 }
@@ -2607,8 +2704,8 @@ void DwarfDebug::emitDebugLines() {
     // Construct rows of the address, source, line, column matrix.
     for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
       const SrcLineInfo &LineInfo = LineInfos[i];
-      unsigned LabelID = LineInfo.getLabelID();
-      if (MMI->isLabelDeleted(LabelID)) continue;
+      MCSymbol *Label = LineInfo.getLabel();
+      if (!Label->isDefined()) continue; // Not emitted, in dead code.
 
       if (LineInfo.getLine() == 0) continue;
 
@@ -2631,8 +2728,8 @@ void DwarfDebug::emitDebugLines() {
       Asm->EmitInt8(dwarf::DW_LNE_set_address); 
 
       Asm->OutStreamer.AddComment("Location label");
-      Asm->OutStreamer.EmitSymbolValue(getDWLabel("label", LabelID),
-                                       TD->getPointerSize(), 0/*AddrSpace*/);
+      Asm->OutStreamer.EmitSymbolValue(Label, TD->getPointerSize(),
+                                       0/*AddrSpace*/);
       
       // If change of source, then switch to the new source.
       if (Source != LineInfo.getSourceID()) {
@@ -2718,7 +2815,7 @@ void DwarfDebug::emitCommonDebugFrame() {
   std::vector<MachineMove> Moves;
   RI->getInitialFrameState(Moves);
 
-  EmitFrameMoves(NULL, 0, Moves, false);
+  EmitFrameMoves(0, Moves, false);
 
   Asm->EmitAlignment(2, 0, 0, false);
   Asm->OutStreamer.EmitLabel(getTempLabel("debug_frame_common_end"));
@@ -2726,8 +2823,8 @@ void DwarfDebug::emitCommonDebugFrame() {
 
 /// emitFunctionDebugFrame - Emit per function frame info into a debug frame
 /// section.
-void
-DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
+void DwarfDebug::
+emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
   if (!MAI->doesDwarfRequireFrameSection())
     return;
 
@@ -2736,33 +2833,31 @@ DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
                               Asm->getObjFileLowering().getDwarfFrameSection());
 
   Asm->OutStreamer.AddComment("Length of Frame Information Entry");
-  EmitDifference(getDWLabel("debug_frame_end", DebugFrameInfo.Number),
-                 getDWLabel("debug_frame_begin", DebugFrameInfo.Number), true);
+  MCSymbol *DebugFrameBegin =
+    getDWLabel("debug_frame_begin", DebugFrameInfo.Number);
+  MCSymbol *DebugFrameEnd =
+    getDWLabel("debug_frame_end", DebugFrameInfo.Number);
+  EmitDifference(DebugFrameEnd, DebugFrameBegin, true);
 
-  Asm->OutStreamer.EmitLabel(getDWLabel("debug_frame_begin",
-                                        DebugFrameInfo.Number));
+  Asm->OutStreamer.EmitLabel(DebugFrameBegin);
 
   Asm->OutStreamer.AddComment("FDE CIE offset");
   EmitSectionOffset(getTempLabel("debug_frame_common"),
                     getTempLabel("section_debug_frame"), true, false);
 
   Asm->OutStreamer.AddComment("FDE initial location");
-  Asm->OutStreamer.EmitSymbolValue(getDWLabel("func_begin",
-                                              DebugFrameInfo.Number),
+  MCSymbol *FuncBeginSym = getDWLabel("func_begin", DebugFrameInfo.Number);
+  Asm->OutStreamer.EmitSymbolValue(FuncBeginSym,
                                    TD->getPointerSize(), 0/*AddrSpace*/);
   
   
-  
   Asm->OutStreamer.AddComment("FDE address range");
-  EmitDifference(getDWLabel("func_end", DebugFrameInfo.Number),
-                 getDWLabel("func_begin", DebugFrameInfo.Number));
+  EmitDifference(getDWLabel("func_end", DebugFrameInfo.Number), FuncBeginSym);
 
-  EmitFrameMoves("func_begin", DebugFrameInfo.Number, DebugFrameInfo.Moves,
-                 false);
+  EmitFrameMoves(FuncBeginSym, DebugFrameInfo.Moves, false);
 
   Asm->EmitAlignment(2, 0, 0, false);
-  Asm->OutStreamer.EmitLabel(getDWLabel("debug_frame_end",
-                                        DebugFrameInfo.Number));
+  Asm->OutStreamer.EmitLabel(DebugFrameEnd);
 }
 
 /// emitDebugPubNames - Emit visible names into a debug pubnames section.
@@ -2860,14 +2955,23 @@ void DwarfDebug::emitDebugStr() {
   Asm->OutStreamer.SwitchSection(
                                 Asm->getObjFileLowering().getDwarfStrSection());
 
-  // For each of strings in the string pool.
-  for (unsigned StringID = 1, N = StringPool.size(); StringID <= N; ++StringID){
+  // Get all of the string pool entries and put them in an array by their ID so
+  // we can sort them.
+  SmallVector<std::pair<unsigned, 
+      StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+  
+  for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
+       I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
+    Entries.push_back(std::make_pair(I->second.second, &*I));
+  
+  array_pod_sort(Entries.begin(), Entries.end());
+  
+  for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
     // Emit a label for reference from debug information entries.
-    Asm->OutStreamer.EmitLabel(getDWLabel("string", StringID));
+    Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
     
     // Emit the string itself.
-    const std::string &String = StringPool[StringID];
-    Asm->OutStreamer.EmitBytes(StringRef(String.c_str(), String.size()+1), 0);
+    Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
   }
 }
 
@@ -2960,13 +3064,12 @@ void DwarfDebug::emitDebugInlineInfo() {
       Asm->OutStreamer.EmitBytes(Name, 0);
       Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
     } else 
-      EmitSectionOffset(getDWLabel("string",
-                                   StringPool.idFor(getRealLinkageName(LName))),
+      EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
                         getTempLabel("section_str"), true);
 
     Asm->OutStreamer.AddComment("Function name");
-    EmitSectionOffset(getDWLabel("string", StringPool.idFor(Name)),
-                      getTempLabel("section_str"), false, true);
+    EmitSectionOffset(getStringPoolEntry(Name), getTempLabel("section_str"),
+                      false, true);
     EmitULEB128(Labels.size(), "Inline count");
 
     for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 40d1d64..d6634e1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -45,16 +45,16 @@ class SrcLineInfo {
   unsigned Line;                     // Source line number.
   unsigned Column;                   // Source column.
   unsigned SourceID;                 // Source ID number.
-  unsigned LabelID;                  // Label in code ID number.
+  MCSymbol *Label;                   // Label in code ID number.
 public:
-  SrcLineInfo(unsigned L, unsigned C, unsigned S, unsigned I)
-    : Line(L), Column(C), SourceID(S), LabelID(I) {}
+  SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
+    : Line(L), Column(C), SourceID(S), Label(label) {}
 
   // Accessors
   unsigned getLine() const { return Line; }
   unsigned getColumn() const { return Column; }
   unsigned getSourceID() const { return SourceID; }
-  unsigned getLabelID() const { return LabelID; }
+  MCSymbol *getLabel() const { return Label; }
 };
 
 class DwarfDebug : public DwarfPrinter {
@@ -102,9 +102,12 @@ class DwarfDebug : public DwarfPrinter {
   ///
   std::vector<DIEValue *> DIEValues;
 
-  /// StringPool - A UniqueVector of strings used by indirect references.
-  ///
-  UniqueVector<std::string> StringPool;
+  /// StringPool - A String->Symbol mapping of strings used by indirect
+  /// references.
+  StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
+  unsigned NextStringPoolNumber;
+  
+  MCSymbol *getStringPoolEntry(StringRef Str);
 
   /// SectionMap - Provides a unique id per text section.
   ///
@@ -126,7 +129,8 @@ class DwarfDebug : public DwarfPrinter {
   //
   DbgScope *CurrentFnDbgScope;
   
-  /// DbgScopeMap - Tracks the scopes in the current function.
+  /// DbgScopeMap - Tracks the scopes in the current function.  Owns the
+  /// contained DbgScope*s.
   ///
   DenseMap<MDNode *, DbgScope *> DbgScopeMap;
 
@@ -135,11 +139,12 @@ class DwarfDebug : public DwarfPrinter {
   DenseMap<MDNode *, DbgScope *> ConcreteScopes;
 
   /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
-  /// not included DbgScopeMap.
+  /// not included DbgScopeMap.  AbstractScopes owns its DbgScope*s.
   DenseMap<MDNode *, DbgScope *> AbstractScopes;
   SmallVector<DbgScope *, 4>AbstractScopesList;
 
-  /// AbstractVariables - Collection on abstract variables.
+  /// AbstractVariables - Collection on abstract variables.  Owned by the
+  /// DbgScopes in AbstractScopes.
   DenseMap<MDNode *, DbgVariable *> AbstractVariables;
 
   /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
@@ -225,7 +230,7 @@ class DwarfDebug : public DwarfPrinter {
 
   /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
   /// information entry.
-  DIEEntry *createDIEEntry(DIE *Entry = NULL);
+  DIEEntry *createDIEEntry(DIE *Entry);
 
   /// addUInt - Add an unsigned integer attribute data and value.
   ///
@@ -356,6 +361,8 @@ class DwarfDebug : public DwarfPrinter {
   /// findAbstractVariable - Find abstract variable associated with Var.
   DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, 
                                     DILocation &Loc);
+  DbgVariable *findAbstractVariable(DIVariable &Var, const MachineInstr *MI,
+                                    DILocation &Loc);
 
   /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
   /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
@@ -473,7 +480,7 @@ class DwarfDebug : public DwarfPrinter {
   /// as well.
   unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
 
-  CompileUnit *constructCompileUnit(MDNode *N);
+  void constructCompileUnit(MDNode *N);
 
   void constructGlobalVariableDIE(MDNode *N);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 11a01fe..4946b4c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -50,25 +50,6 @@ DwarfException::~DwarfException() {
   delete ExceptionTimer;
 }
 
-/// CreateLabelDiff - Emit a label and subtract it from the expression we
-/// already have.  This is equivalent to emitting "foo - .", but we have to emit
-/// the label for "." directly.
-const MCExpr *DwarfException::CreateLabelDiff(const MCExpr *ExprRef,
-                                              const char *LabelName,
-                                              unsigned Index) {
-  SmallString<64> Name;
-  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
-                            << LabelName << Asm->getFunctionNumber()
-                            << "_" << Index;
-  MCSymbol *DotSym = Asm->OutContext.GetOrCreateTemporarySymbol(Name.str());
-  Asm->OutStreamer.EmitLabel(DotSym);
-
-  return MCBinaryExpr::CreateSub(ExprRef,
-                                 MCSymbolRefExpr::Create(DotSym,
-                                                         Asm->OutContext),
-                                 Asm->OutContext);
-}
-
 /// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
 /// is shared among many Frame Description Entries.  There is at least one CIE
 /// in every non-empty .debug_frame section.
@@ -169,7 +150,7 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
   // Indicate locations of general callee saved registers in frame.
   std::vector<MachineMove> Moves;
   RI->getInitialFrameState(Moves);
-  EmitFrameMoves(NULL, 0, Moves, true);
+  EmitFrameMoves(0, Moves, true);
 
   // On Darwin the linker honors the alignment of eh_frame, which means it must
   // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
@@ -240,12 +221,13 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
                                  EHFrameInfo.PersonalityIndex),
                       true, true);
 
+    MCSymbol *EHFuncBeginSym = getDWLabel("eh_func_begin", EHFrameInfo.Number);
 
     Asm->OutStreamer.AddComment("FDE initial location");
-    EmitReference(getDWLabel("eh_func_begin", EHFrameInfo.Number), FDEEncoding);
+    EmitReference(EHFuncBeginSym, FDEEncoding);
+    
     Asm->OutStreamer.AddComment("FDE address range");
-    EmitDifference(getDWLabel("eh_func_end", EHFrameInfo.Number),
-                   getDWLabel("eh_func_begin", EHFrameInfo.Number),
+    EmitDifference(getDWLabel("eh_func_end", EHFrameInfo.Number),EHFuncBeginSym,
                    SizeOfEncodedValue(FDEEncoding) == 4);
 
     // If there is a personality and landing pads then point to the language
@@ -265,8 +247,7 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
     }
 
     // Indicate locations of function specific callee saved registers in frame.
-    EmitFrameMoves("eh_func_begin", EHFrameInfo.Number, EHFrameInfo.Moves,
-                   true);
+    EmitFrameMoves(EHFuncBeginSym, EHFrameInfo.Moves, true);
 
     // On Darwin the linker honors the alignment of eh_frame, which means it
     // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
@@ -473,7 +454,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
                      const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
                      const SmallVectorImpl<unsigned> &FirstActions) {
   // The end label of the previous invoke or nounwind try-range.
-  unsigned LastLabel = 0;
+  MCSymbol *LastLabel = 0;
 
   // Whether there is a potentially throwing instruction (currently this means
   // an ordinary call) between the end of the previous try-range and now.
@@ -490,14 +471,11 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
       if (!MI->isLabel()) {
         if (MI->getDesc().isCall())
           SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
-
         continue;
       }
 
-      unsigned BeginLabel = MI->getOperand(0).getImm();
-      assert(BeginLabel && "Invalid label!");
-
       // End of the previous try-range?
+      MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
       if (BeginLabel == LastLabel)
         SawPotentiallyThrowing = false;
 
@@ -599,7 +577,6 @@ void DwarfException::EmitExceptionTable() {
   const std::vector<GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
   const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
   const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-  if (PadInfos.empty()) return;
 
   // Sort the landing pads in order of their type ids.  This is used to fold
   // duplicate actions.
@@ -624,7 +601,7 @@ void DwarfException::EmitExceptionTable() {
   for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
     const LandingPadInfo *LandingPad = LandingPads[i];
     for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
-      unsigned BeginLabel = LandingPad->BeginLabels[j];
+      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
       assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
       PadRange P = { i, j };
       PadMap[BeginLabel] = P;
@@ -809,45 +786,33 @@ void DwarfException::EmitExceptionTable() {
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
       const CallSiteEntry &S = *I;
-      const char *BeginTag;
-      unsigned BeginNumber;
-
-      if (!S.BeginLabel) {
-        BeginTag = "eh_func_begin";
-        BeginNumber = SubprogramCount;
-      } else {
-        BeginTag = "label";
-        BeginNumber = S.BeginLabel;
-      }
-
+      
+      MCSymbol *EHFuncBeginSym = getDWLabel("eh_func_begin", SubprogramCount);
+      
+      MCSymbol *BeginLabel = S.BeginLabel;
+      if (BeginLabel == 0)
+        BeginLabel = EHFuncBeginSym;
+      MCSymbol *EndLabel = S.EndLabel;
+      if (EndLabel == 0)
+        EndLabel = getDWLabel("eh_func_end", SubprogramCount);
+        
       // Offset of the call site relative to the previous call site, counted in
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
       Asm->OutStreamer.AddComment("Region start");
-      EmitSectionOffset(getDWLabel(BeginTag, BeginNumber),
-                        getDWLabel("eh_func_begin", SubprogramCount),
-                        true, true);
-
+      EmitSectionOffset(BeginLabel, EHFuncBeginSym, true, true);
+      
       Asm->OutStreamer.AddComment("Region length");
-      if (!S.EndLabel)
-        EmitDifference(getDWLabel("eh_func_end", SubprogramCount),
-                       getDWLabel(BeginTag, BeginNumber),
-                       true);
-      else
-        EmitDifference(getDWLabel("label", S.EndLabel), 
-                       getDWLabel(BeginTag, BeginNumber), true);
+      EmitDifference(EndLabel, BeginLabel, true);
 
 
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
       Asm->OutStreamer.AddComment("Landing pad");
-      if (!S.PadLabel) {
+      if (!S.PadLabel)
         Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
-      } else {
-        EmitSectionOffset(getDWLabel("label", S.PadLabel),
-                          getDWLabel("eh_func_begin", SubprogramCount),
-                          true, true);
-      }
+      else
+        EmitSectionOffset(S.PadLabel, EHFuncBeginSym, true, true);
 
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
@@ -947,16 +912,11 @@ void DwarfException::BeginFunction(const MachineFunction *MF) {
   this->MF = MF;
   shouldEmitTable = shouldEmitMoves = false;
 
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
-
   // If any landing pads survive, we need an EH table.
-  if (!MMI->getLandingPads().empty())
-    shouldEmitTable = true;
+  shouldEmitTable = !MMI->getLandingPads().empty();
 
   // See if we need frame move info.
-  if (!MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
-    shouldEmitMoves = true;
+  shouldEmitMoves = !MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
 
   if (shouldEmitMoves || shouldEmitTable)
     // Assumes in correct section after the entry point.
@@ -978,7 +938,16 @@ void DwarfException::EndFunction() {
     ExceptionTimer->startTimer();
 
   Asm->OutStreamer.EmitLabel(getDWLabel("eh_func_end", SubprogramCount));
-  EmitExceptionTable();
+
+  // Record if this personality index uses a landing pad.
+  bool HasLandingPad = !MMI->getLandingPads().empty();
+  UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
+  
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (HasLandingPad)
+    EmitExceptionTable();
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   MCSymbol *FunctionEHSym =
@@ -993,9 +962,6 @@ void DwarfException::EndFunction() {
                                          MMI->getFrameMoves(),
                                          MF->getFunction()));
 
-  // Record if this personality index uses a landing pad.
-  UsesLSDA[MMI->getPersonalityIndex()] |= !MMI->getLandingPads().empty();
-
   if (TimePassesIsEnabled)
     ExceptionTimer->stopTimer();
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 3db1a00..4bc4a458 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -111,13 +111,6 @@ class DwarfException : public DwarfPrinter {
   /// PadLT - Order landing pads lexicographically by type id.
   static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
 
-  struct KeyInfo {
-    static inline unsigned getEmptyKey() { return -1U; }
-    static inline unsigned getTombstoneKey() { return -2U; }
-    static unsigned getHashValue(const unsigned &Key) { return Key; }
-    static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
-  };
-
   /// PadRange - Structure holding a try-range and the associated landing pad.
   struct PadRange {
     // The index of the landing pad.
@@ -126,7 +119,7 @@ class DwarfException : public DwarfPrinter {
     unsigned RangeIndex;
   };
 
-  typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType;
+  typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
 
   /// ActionEntry - Structure describing an entry in the actions table.
   struct ActionEntry {
@@ -138,11 +131,11 @@ class DwarfException : public DwarfPrinter {
   /// CallSiteEntry - Structure describing an entry in the call-site table.
   struct CallSiteEntry {
     // The 'try-range' is BeginLabel .. EndLabel.
-    unsigned BeginLabel; // zero indicates the start of the function.
-    unsigned EndLabel;   // zero indicates the end of the function.
+    MCSymbol *BeginLabel; // zero indicates the start of the function.
+    MCSymbol *EndLabel;   // zero indicates the end of the function.
 
     // The landing pad starts at PadLabel.
-    unsigned PadLabel;   // zero indicates that there is no landing pad.
+    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
     unsigned Action;
   };
 
@@ -169,11 +162,6 @@ class DwarfException : public DwarfPrinter {
                             const SmallVectorImpl<unsigned> &FirstActions);
   void EmitExceptionTable();
 
-  /// CreateLabelDiff - Emit a label and subtract it from the expression we
-  /// already have.  This is equivalent to emitting "foo - .", but we have to
-  /// emit the label for "." directly.
-  const MCExpr *CreateLabelDiff(const MCExpr *ExprRef, const char *LabelName,
-                                unsigned Index);
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index 7890e5c..e212696 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -189,7 +189,9 @@ void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc,
 void DwarfPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
-  const MCExpr *Exp = TLOF.getSymbolForDwarfReference(Sym, Asm->MMI, Encoding);
+  const MCExpr *Exp = TLOF.getExprForDwarfReference(Sym, Asm->Mang,
+                                                    Asm->MMI, Encoding,
+                                                    Asm->OutStreamer);
   Asm->OutStreamer.EmitValue(Exp, SizeOfEncodedValue(Encoding), /*addrspace*/0);
 }
 
@@ -197,7 +199,8 @@ void DwarfPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
   const MCExpr *Exp =
-    TLOF.getSymbolForDwarfGlobalReference(GV, Asm->Mang, Asm->MMI, Encoding);
+    TLOF.getExprForDwarfGlobalReference(GV, Asm->Mang, Asm->MMI, Encoding,
+                                        Asm->OutStreamer);
   Asm->OutStreamer.EmitValue(Exp, SizeOfEncodedValue(Encoding), /*addrspace*/0);
 }
 
@@ -222,10 +225,11 @@ void DwarfPrinter::EmitSectionOffset(const MCSymbol *Label,
     return EmitDifference(Label, Section, IsSmall);
   
   // On COFF targets, we have to emit the weird .secrel32 directive.
-  if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective())
+  if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
     // FIXME: MCize.
     Asm->O << SecOffDir << Label->getName();
-  else {
+    Asm->OutStreamer.AddBlankLine();
+  } else {
     unsigned Size = IsSmall ? 4 : TD->getPointerSize();
     Asm->OutStreamer.EmitSymbolValue(Label, Size, 0/*AddrSpace*/);
   }
@@ -233,34 +237,31 @@ void DwarfPrinter::EmitSectionOffset(const MCSymbol *Label,
 
 /// EmitFrameMoves - Emit frame instructions to describe the layout of the
 /// frame.
-void DwarfPrinter::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+void DwarfPrinter::EmitFrameMoves(MCSymbol *BaseLabel,
                                   const std::vector<MachineMove> &Moves,
                                   bool isEH) {
-  int stackGrowth =
-    Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
-    TargetFrameInfo::StackGrowsUp ?
-    TD->getPointerSize() : -TD->getPointerSize();
-  bool IsLocal = BaseLabel && strcmp(BaseLabel, "label") == 0;
-
+  int stackGrowth = TD->getPointerSize();
+  if (Asm->TM.getFrameInfo()->getStackGrowthDirection() !=
+      TargetFrameInfo::StackGrowsUp)
+    stackGrowth *= -1;
+  
   for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
     const MachineMove &Move = Moves[i];
-    unsigned LabelID = Move.getLabelID();
-
+    MCSymbol *Label = Move.getLabel();
     // Throw out move if the label is invalid.
-    if (LabelID && MMI->isLabelDeleted(LabelID))
-      continue;
+    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
 
     const MachineLocation &Dst = Move.getDestination();
     const MachineLocation &Src = Move.getSource();
 
     // Advance row if new location.
-    if (BaseLabel && LabelID && (BaseLabelID != LabelID || !IsLocal)) {
-      EmitCFAByte(dwarf::DW_CFA_advance_loc4);
-      EmitDifference(getDWLabel("label", LabelID),
-                     getDWLabel(BaseLabel, BaseLabelID), true);
-      BaseLabelID = LabelID;
-      BaseLabel = "label";
-      IsLocal = true;
+    if (BaseLabel && Label) {
+      MCSymbol *ThisSym = Label;
+      if (ThisSym != BaseLabel) {
+        EmitCFAByte(dwarf::DW_CFA_advance_loc4);
+        EmitDifference(ThisSym, BaseLabel, true);
+        BaseLabel = ThisSym;
+      }
     }
 
     // If advancing cfa.
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 5e2d806..0b94645 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -122,7 +122,7 @@ public:
   
   /// EmitFrameMoves - Emit frame instructions to describe the layout of the
   /// frame.
-  void EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
+  void EmitFrameMoves(MCSymbol *BaseLabel,
                       const std::vector<MachineMove> &Moves, bool isEH);
 };
 
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3531ed6..fa840e1 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -15,8 +15,9 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/Module.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -138,8 +139,7 @@ void OcamlGCMetadataPrinter::finishAssembly(raw_ostream &OS, AsmPrinter &AP,
         llvm_report_error(Msg.str()); // Very rude!
       }
 
-      OS << AddressDirective
-        << MAI.getPrivateGlobalPrefix() << "label" << J->Num << '\n';
+      OS << AddressDirective << J->Label->getName() << '\n';
 
       AP.EmitInt16(FrameSize);
 
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 889763a..13ae43d 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -105,17 +105,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end()-1);
 
-  // If there are any labels in the basic block, unregister them from
-  // MachineModuleInfo.
-  if (MMI && !MBB->empty()) {
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-         I != E; ++I) {
-      if (I->isLabel())
-        // The label ID # is always operand #0, an immediate.
-        MMI->InvalidateLabel(I->getOperand(0).getImm());
-    }
-  }
-
   // Remove the block.
   MF->erase(MBB);
 }
@@ -984,6 +973,17 @@ static bool IsEmptyBlock(MachineBasicBlock *MBB) {
   return true;
 }
 
+// Blocks with only debug info and branches should be considered the same
+// as blocks with only branches.
+static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
+  MachineBasicBlock::iterator MBBI, MBBE;
+  for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) {
+    if (!MBBI->isDebugValue())
+      break;
+  }
+  return (MBBI->getDesc().isBranch());
+}
+
 /// IsBetterFallthrough - Return true if it would be clearly better to
 /// fall-through to MBB1 than to fall through into MBB2.  This has to return
 /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
@@ -1206,7 +1206,7 @@ ReoptimizeBlock:
     // If this branch is the only thing in its block, see if we can forward
     // other blocks across it.
     if (CurTBB && CurCond.empty() && CurFBB == 0 &&
-        MBB->begin()->getDesc().isBranch() && CurTBB != MBB &&
+        IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
         !MBB->hasAddressTaken()) {
       // This block may contain just an unconditional branch.  Because there can
       // be 'non-branch terminators' in the block, try removing the branch and
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index b5e9c84..2ec1f6e 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -57,13 +57,13 @@ namespace llvm {
     bool finishFunction(MachineFunction &F);
 
     /// emitLabel - Emits a label
-    virtual void emitLabel(uint64_t LabelID) {
+    virtual void emitLabel(MCSymbol *Label) {
       assert("emitLabel not implemented");
     }
 
     /// getLabelAddress - Return the address of the specified LabelID, 
     /// only usable after the LabelID has been emitted.
-    virtual uintptr_t getLabelAddress(uint64_t Label) const {
+    virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
       assert("getLabelAddress not implemented");
       return 0;
     }
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 0979c04..eda167c 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -64,7 +64,7 @@ char ELFWriter::ID = 0;
 
 ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
   : MachineFunctionPass(&ID), O(o), TM(tm),
-    OutContext(*new MCContext()),
+    OutContext(*new MCContext(*TM.getMCAsmInfo())),
     TLOF(TM.getTargetLowering()->getObjFileLowering()),
     is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
     isLittleEndian(TM.getTargetData()->isLittleEndian()),
@@ -109,7 +109,7 @@ bool ELFWriter::doInitialization(Module &M) {
   // Initialize TargetLoweringObjectFile.
   const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
   
-  Mang = new Mangler(*MAI);
+  Mang = new Mangler(OutContext, *TM.getTargetData());
 
   // ELF Header
   // ----------
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 055172b..ab0a800 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Pass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -150,30 +151,31 @@ static const char *DescKind(GC::PointKind Kind) {
 }
 
 bool Printer::runOnFunction(Function &F) {
-  if (!F.hasGC()) {
-    GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  if (F.hasGC()) return false;
+  
+  GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  
+  OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+  for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+                                      RE = FD->roots_end(); RI != RE; ++RI)
+    OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+  
+  OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+  for (GCFunctionInfo::iterator PI = FD->begin(),
+                                PE = FD->end(); PI != PE; ++PI) {
     
-    OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
-    for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
-                                        RE = FD->roots_end(); RI != RE; ++RI)
-      OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+    OS << "\t" << PI->Label->getName() << ": "
+       << DescKind(PI->Kind) << ", live = {";
     
-    OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
-    for (GCFunctionInfo::iterator PI = FD->begin(),
-                                  PE = FD->end(); PI != PE; ++PI) {
-      
-      OS << "\tlabel " << PI->Num << ": " << DescKind(PI->Kind) << ", live = {";
-      
-      for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
-                                         RE = FD->live_end(PI);;) {
-        OS << " " << RI->Num;
-        if (++RI == RE)
-          break;
-        OS << ",";
-      }
-      
-      OS << " }\n";
+    for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+                                       RE = FD->live_end(PI);;) {
+      OS << " " << RI->Num;
+      if (++RI == RE)
+        break;
+      OS << ",";
     }
+    
+    OS << " }\n";
   }
   
   return false;
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
index 9cd2925..752752f 100644
--- a/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCMetadataPrinter.h"
-
 using namespace llvm;
 
 GCMetadataPrinter::GCMetadataPrinter() { }
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index b5006fd..1a23be0 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -71,9 +71,9 @@ namespace {
     
     void FindSafePoints(MachineFunction &MF);
     void VisitCallPoint(MachineBasicBlock::iterator MI);
-    unsigned InsertLabel(MachineBasicBlock &MBB, 
-                         MachineBasicBlock::iterator MI,
-                         DebugLoc DL) const;
+    MCSymbol *InsertLabel(MachineBasicBlock &MBB, 
+                          MachineBasicBlock::iterator MI,
+                          DebugLoc DL) const;
     
     void FindStackOffsets(MachineFunction &MF);
     
@@ -329,14 +329,11 @@ void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<GCModuleInfo>();
 }
 
-unsigned MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
-                                     MachineBasicBlock::iterator MI,
-                                     DebugLoc DL) const {
-  unsigned Label = MMI->NextLabelID();
-  
-  BuildMI(MBB, MI, DL,
-          TII->get(TargetOpcode::GC_LABEL)).addImm(Label);
-  
+MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
+                                           MachineBasicBlock::iterator MI,
+                                           DebugLoc DL) const {
+  MCSymbol *Label = MBB.getParent()->getContext().GetOrCreateTemporarySymbol();
+  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
   return Label;
 }
 
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 9bc0b71..75e45ef 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -13,16 +13,15 @@
 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/PassManager.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -94,21 +93,19 @@ static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden,
     cl::desc("Split GEPs and run no-load GVN"));
 
 LLVMTargetMachine::LLVMTargetMachine(const Target &T,
-                                     const std::string &TargetTriple)
-  : TargetMachine(T) {
+                                     const std::string &Triple)
+  : TargetMachine(T), TargetTriple(Triple) {
   AsmInfo = T.createAsmInfo(TargetTriple);
 }
 
 // Set the default code model for the JIT for a generic target.
 // FIXME: Is small right here? or .is64Bit() ? Large : Small?
-void
-LLVMTargetMachine::setCodeModelForJIT() {
+void LLVMTargetMachine::setCodeModelForJIT() {
   setCodeModel(CodeModel::Small);
 }
 
 // Set the default code model for static compilation for a generic target.
-void
-LLVMTargetMachine::setCodeModelForStatic() {
+void LLVMTargetMachine::setCodeModelForStatic() {
   setCodeModel(CodeModel::Small);
 }
 
@@ -118,20 +115,21 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                             CodeGenOpt::Level OptLevel,
                                             bool DisableVerify) {
   // Add common CodeGen passes.
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
+  MCContext *Context = 0;
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
     return true;
+  assert(Context != 0 && "Failed to get MCContext");
 
-  OwningPtr<MCContext> Context(new MCContext());
+  const MCAsmInfo &MAI = *getMCAsmInfo();
   OwningPtr<MCStreamer> AsmStreamer;
 
   formatted_raw_ostream *LegacyOutput;
   switch (FileType) {
   default: return true;
   case CGFT_AssemblyFile: {
-    const MCAsmInfo &MAI = *getMCAsmInfo();
     MCInstPrinter *InstPrinter =
       getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, Out);
-    AsmStreamer.reset(createAsmStreamer(*Context, Out, MAI,
+    AsmStreamer.reset(createAsmStreamer(*Context, Out,
                                         getTargetData()->isLittleEndian(),
                                         getVerboseAsm(), InstPrinter,
                                         /*codeemitter*/0));
@@ -143,10 +141,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
     MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
-    if (MCE == 0)
+    TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
+    if (MCE == 0 || TAB == 0)
       return true;
     
-    AsmStreamer.reset(createMachOStreamer(*Context, Out, MCE));
+    AsmStreamer.reset(createMachOStreamer(*Context, *TAB, Out, MCE));
     
     // Any output to the asmprinter's "O" stream is bad and needs to be fixed,
     // force it to come out stderr.
@@ -167,16 +166,14 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     break;
   }
   
-  // Create the AsmPrinter, which takes ownership of Context and AsmStreamer
-  // if successful.
+  // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
   FunctionPass *Printer =
-    getTarget().createAsmPrinter(*LegacyOutput, *this, *Context, *AsmStreamer,
-                                 getMCAsmInfo());
+    getTarget().createAsmPrinter(*LegacyOutput, *this, *AsmStreamer);
   if (Printer == 0)
     return true;
   
-  // If successful, createAsmPrinter took ownership of AsmStreamer and Context.
-  Context.take(); AsmStreamer.take();
+  // If successful, createAsmPrinter took ownership of AsmStreamer.
+  AsmStreamer.take();
   
   PM.add(Printer);
   
@@ -200,7 +197,8 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
   setCodeModelForJIT();
   
   // Add common CodeGen passes.
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
+  MCContext *Ctx = 0;
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
     return true;
 
   addCodeEmitter(PM, OptLevel, JCE);
@@ -209,8 +207,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
   return false; // success!
 }
 
-static void printNoVerify(PassManagerBase &PM,
-                           const char *Banner) {
+static void printNoVerify(PassManagerBase &PM, const char *Banner) {
   if (PrintMachineCode)
     PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
 }
@@ -230,7 +227,8 @@ static void printAndVerify(PassManagerBase &PM,
 ///
 bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                                                CodeGenOpt::Level OptLevel,
-                                               bool DisableVerify) {
+                                               bool DisableVerify,
+                                               MCContext *&OutContext) {
   // Standard LLVM-Level Passes.
 
   // Before running any passes, run the verifier to determine if the input
@@ -253,8 +251,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   // Turn exception handling constructs into something the code generators can
   // handle.
-  switch (getMCAsmInfo()->getExceptionHandlingType())
-  {
+  switch (getMCAsmInfo()->getExceptionHandlingType()) {
   case ExceptionHandling::SjLj:
     // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
     // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
@@ -294,6 +291,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     PM.add(createVerifierPass());
 
   // Standard Lower-Level Passes.
+  
+  // Install a MachineModuleInfo class, which is an immutable pass that holds
+  // all the per-module stuff we're generating, including MCContext.
+  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo());
+  PM.add(MMI);
+  OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
+  
 
   // Set up a MachineFunction for the rest of CodeGen to work on.
   PM.add(new MachineFunctionAnalysis(*this, OptLevel));
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index f8b1707..dbb5e19 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -218,9 +218,9 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
   return false;
 }
 
-/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
-/// it can check use as well.
-bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
+/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except
+/// it checks for sub-register reference and it can check use as well.
+bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li,
                                             unsigned Reg, bool CheckUse,
                                   SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
   for (LiveInterval::Ranges::const_iterator
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 32b1a7d..fc8ae5f 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -41,9 +41,10 @@ MachineBasicBlock::~MachineBasicBlock() {
 
 /// getSymbol - Return the MCSymbol for this basic block.
 ///
-MCSymbol *MachineBasicBlock::getSymbol(MCContext &Ctx) const {
+MCSymbol *MachineBasicBlock::getSymbol() const {
   const MachineFunction *MF = getParent();
-  const char *Prefix = MF->getTarget().getMCAsmInfo()->getPrivateGlobalPrefix();
+  MCContext &Ctx = MF->getContext();
+  const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
   return Ctx.GetOrCreateTemporarySymbol(Twine(Prefix) + "BB" +
                                         Twine(MF->getFunctionNumber()) + "_" +
                                         Twine(getNumber()));
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index ce95d8d..91d3635 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -122,8 +122,12 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
       // Reached end of block, register is obviously dead.
       return true;
 
-    if (I->isDebugValue())
+    if (I->isDebugValue()) {
+      // These must not count against the limit.
+      ++LookAheadLeft;
+      ++I;
       continue;
+    }
     bool SeenDef = false;
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = I->getOperand(i);
@@ -188,7 +192,7 @@ static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) {
 
 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
   if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
-      MI->isKill() || MI->isInlineAsm())
+      MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
     return false;
 
   // Ignore copies.
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 1e3cb1e..37f3d22 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -85,8 +85,8 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
 }
 
 MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
-                                 unsigned FunctionNum)
-  : Fn(F), Target(TM) {
+                                 unsigned FunctionNum, MCContext &ctx)
+  : Fn(F), Target(TM), Ctx(ctx) {
   if (TM.getRegisterInfo())
     RegInfo = new (Allocator.Allocate<MachineRegisterInfo>())
                   MachineRegisterInfo(*TM.getRegisterInfo());
@@ -574,6 +574,8 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
   case MachineJumpTableInfo::EK_LabelDifference32:
   case MachineJumpTableInfo::EK_Custom32:
     return 4;
+  case MachineJumpTableInfo::EK_Inline:
+    return 0;
   }
   assert(0 && "Unknown jump table encoding!");
   return ~0;
@@ -591,6 +593,8 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
   case MachineJumpTableInfo::EK_LabelDifference32:
   case MachineJumpTableInfo::EK_Custom32:
     return TD.getABIIntegerTypeAlignment(32);
+  case MachineJumpTableInfo::EK_Inline:
+    return 1;
   }
   assert(0 && "Unknown jump table encoding!");
   return ~0;
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 8d87e3e..d3f1d82 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 using namespace llvm;
 
 // Register this pass with PassInfo directly to avoid having to define
@@ -36,7 +37,8 @@ MachineFunctionAnalysis::~MachineFunctionAnalysis() {
 
 bool MachineFunctionAnalysis::runOnFunction(Function &F) {
   assert(!MF && "MachineFunctionAnalysis already initialized!");
-  MF = new MachineFunction(&F, TM, NextFnNum++);
+  MF = new MachineFunction(&F, TM, NextFnNum++,
+                           getAnalysis<MachineModuleInfo>().getContext());
   return false;
 }
 
@@ -47,4 +49,5 @@ void MachineFunctionAnalysis::releaseMemory() {
 
 void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
+  AU.addRequired<MachineModuleInfo>();
 }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 4c7cb8f..40d6b20 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/InlineAsm.h"
+#include "llvm/Metadata.h"
 #include "llvm/Type.h"
 #include "llvm/Value.h"
 #include "llvm/Assembly/Writer.h"
@@ -23,6 +24,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetInstrDesc.h"
@@ -35,7 +37,6 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/Metadata.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -189,6 +190,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
            getOffset() == Other.getOffset();
   case MachineOperand::MO_BlockAddress:
     return getBlockAddress() == Other.getBlockAddress();
+  case MachineOperand::MO_MCSymbol:
+    return getMCSymbol() == Other.getMCSymbol();
   }
 }
 
@@ -291,6 +294,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
     OS << '>';
     break;
+  case MachineOperand::MO_MCSymbol:
+    OS << "<MCSym=" << *getMCSymbol() << '>';
+    break;
   default:
     llvm_unreachable("Unrecognized operand type");
   }
@@ -1363,30 +1369,33 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
     const MachineOperand &MO = MI->getOperand(i);
     uint64_t Key = (uint64_t)MO.getType() << 32;
     switch (MO.getType()) {
-      default: break;
-      case MachineOperand::MO_Register:
-        if (MO.isDef() && MO.getReg() &&
-            TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-          continue;  // Skip virtual register defs.
-        Key |= MO.getReg();
-        break;
-      case MachineOperand::MO_Immediate:
-        Key |= MO.getImm();
-        break;
-      case MachineOperand::MO_FrameIndex:
-      case MachineOperand::MO_ConstantPoolIndex:
-      case MachineOperand::MO_JumpTableIndex:
-        Key |= MO.getIndex();
-        break;
-      case MachineOperand::MO_MachineBasicBlock:
-        Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
-        break;
-      case MachineOperand::MO_GlobalAddress:
-        Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
-        break;
-      case MachineOperand::MO_BlockAddress:
-        Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
-        break;
+    default: break;
+    case MachineOperand::MO_Register:
+      if (MO.isDef() && MO.getReg() &&
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;  // Skip virtual register defs.
+      Key |= MO.getReg();
+      break;
+    case MachineOperand::MO_Immediate:
+      Key |= MO.getImm();
+      break;
+    case MachineOperand::MO_FrameIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_JumpTableIndex:
+      Key |= MO.getIndex();
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+      break;
+    case MachineOperand::MO_BlockAddress:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+      break;
+    case MachineOperand::MO_MCSymbol:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+      break;
     }
     Key += ~(Key << 32);
     Key ^= (Key >> 22);
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 72fb9fd..af48e9e 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -10,6 +10,11 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 
 #include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -17,11 +22,8 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
@@ -29,38 +31,263 @@ using namespace llvm::dwarf;
 
 // Handle the Pass registration stuff necessary to use TargetData's.
 static RegisterPass<MachineModuleInfo>
-X("machinemoduleinfo", "Module Information");
+X("machinemoduleinfo", "Machine Module Information");
 char MachineModuleInfo::ID = 0;
 
 // Out of line virtual method.
 MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
 
+namespace llvm {
+class MMIAddrLabelMapCallbackPtr : CallbackVH {
+  MMIAddrLabelMap *Map;
+public:
+  MMIAddrLabelMapCallbackPtr() : Map(0) {}
+  MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
+  
+  void setMap(MMIAddrLabelMap *map) { Map = map; }
+  
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *V2);
+};
+  
+class MMIAddrLabelMap {
+  MCContext &Context;
+  struct AddrLabelSymEntry {
+    /// Symbols - The symbols for the label.  This is a pointer union that is
+    /// either one symbol (the common case) or a list of symbols.
+    PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
+    
+    Function *Fn;   // The containing function of the BasicBlock.
+    unsigned Index; // The index in BBCallbacks for the BasicBlock.
+  };
+  
+  DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+  
+  /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for.  We
+  /// use this so we get notified if a block is deleted or RAUWd.
+  std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
+
+  /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
+  /// whose corresponding BasicBlock got deleted.  These symbols need to be
+  /// emitted at some point in the file, so AsmPrinter emits them after the
+  /// function body.
+  DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+    DeletedAddrLabelsNeedingEmission;
+public:
+  
+  MMIAddrLabelMap(MCContext &context) : Context(context) {}
+  ~MMIAddrLabelMap() {
+    assert(DeletedAddrLabelsNeedingEmission.empty() &&
+           "Some labels for deleted blocks never got emitted");
+    
+    // Deallocate any of the 'list of symbols' case.
+    for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
+         I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
+      if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
+        delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
+  }
+  
+  MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
+  std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+  void takeDeletedSymbolsForFunction(Function *F, 
+                                     std::vector<MCSymbol*> &Result);
+
+  void UpdateForDeletedBlock(BasicBlock *BB);
+  void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+}
+
+MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
+  assert(BB->hasAddressTaken() &&
+         "Shouldn't get label for block without address taken");
+  AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+  
+  // If we already had an entry for this block, just return it.
+  if (!Entry.Symbols.isNull()) {
+    assert(BB->getParent() == Entry.Fn && "Parent changed");
+    if (Entry.Symbols.is<MCSymbol*>())
+      return Entry.Symbols.get<MCSymbol*>();
+    return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
+  }
+  
+  // Otherwise, this is a new entry, create a new symbol for it and add an
+  // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+  BBCallbacks.push_back(BB);
+  BBCallbacks.back().setMap(this);
+  Entry.Index = BBCallbacks.size()-1;
+  Entry.Fn = BB->getParent();
+  MCSymbol *Result = Context.CreateTempSymbol();
+  Entry.Symbols = Result;
+  return Result;
+}
+
+std::vector<MCSymbol*>
+MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+  assert(BB->hasAddressTaken() &&
+         "Shouldn't get label for block without address taken");
+  AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+  
+  std::vector<MCSymbol*> Result;
+  
+  // If we already had an entry for this block, just return it.
+  if (Entry.Symbols.isNull())
+    Result.push_back(getAddrLabelSymbol(BB));
+  else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>())
+    Result.push_back(Sym);
+  else
+    Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>();
+  return Result;
+}
+
+
+/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
+/// them.
+void MMIAddrLabelMap::
+takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
+  DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+    DeletedAddrLabelsNeedingEmission.find(F);
+
+  // If there are no entries for the function, just return.
+  if (I == DeletedAddrLabelsNeedingEmission.end()) return;
+  
+  // Otherwise, take the list.
+  std::swap(Result, I->second);
+  DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+
+void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+  // If the block got deleted, there is no need for the symbol.  If the symbol
+  // was already emitted, we can just forget about it, otherwise we need to
+  // queue it up for later emission when the function is output.
+  AddrLabelSymEntry Entry = AddrLabelSymbols[BB];
+  AddrLabelSymbols.erase(BB);
+  assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+  BBCallbacks[Entry.Index] = 0;  // Clear the callback.
+
+  assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) &&
+         "Block/parent mismatch");
+
+  // Handle both the single and the multiple symbols cases.
+  if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
+    if (Sym->isDefined())
+      return;
+  
+    // If the block is not yet defined, we need to emit it at the end of the
+    // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
+    // for the containing Function.  Since the block is being deleted, its
+    // parent may already be removed, we have to get the function from 'Entry'.
+    DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+  } else {
+    std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>();
+
+    for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
+      MCSymbol *Sym = (*Syms)[i];
+      if (Sym->isDefined()) continue;  // Ignore already emitted labels.
+      
+      // If the block is not yet defined, we need to emit it at the end of the
+      // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
+      // for the containing Function.  Since the block is being deleted, its
+      // parent may already be removed, we have to get the function from
+      // 'Entry'.
+      DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+    }
+    
+    // The entry is deleted, free the memory associated with the symbol list.
+    delete Syms;
+  }
+}
+
+void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+  // Get the entry for the RAUW'd block and remove it from our map.
+  AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old];
+  AddrLabelSymbols.erase(Old);
+  assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+
+  AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+  // If New is not address taken, just move our symbol over to it.
+  if (NewEntry.Symbols.isNull()) {
+    BBCallbacks[OldEntry.Index] = New;    // Update the callback.
+    NewEntry = OldEntry;     // Set New's entry.
+    return;
+  }
+
+  BBCallbacks[OldEntry.Index] = 0;    // Update the callback.
+
+  // Otherwise, we need to add the old symbol to the new block's set.  If it is
+  // just a single entry, upgrade it to a symbol list.
+  if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) {
+    std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>();
+    SymList->push_back(PrevSym);
+    NewEntry.Symbols = SymList;
+  }
+      
+  std::vector<MCSymbol*> *SymList =
+    NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
+
+  // If the old entry was a single symbol, add it.
+  if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) {
+    SymList->push_back(Sym);
+    return;
+  }
+  
+  // Otherwise, concatenate the list.
+  std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
+  SymList->insert(SymList->end(), Syms->begin(), Syms->end());
+  delete Syms;
+}
+
+
+void MMIAddrLabelMapCallbackPtr::deleted() {
+  Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+  Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
+
 //===----------------------------------------------------------------------===//
 
-MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(&ID)
-, ObjFileMMI(0)
-, CurCallSite(0)
-, CallsEHReturn(0)
-, CallsUnwindInit(0)
-, DbgInfoAvailable(false) {
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
+: ImmutablePass(&ID), Context(MAI),
+  ObjFileMMI(0),
+  CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){
   // Always emit some info, by default "no personality" info.
   Personalities.push_back(NULL);
+  AddrLabelSymbols = 0;
+}
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass(&ID), Context(*(MCAsmInfo*)0) {
+  assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
+         "should always be explicitly constructed by LLVMTargetMachine");
+  abort();
 }
 
 MachineModuleInfo::~MachineModuleInfo() {
   delete ObjFileMMI;
+  
+  // FIXME: Why isn't doFinalization being called??
+  //assert(AddrLabelSymbols == 0 && "doFinalization not called");
+  delete AddrLabelSymbols;
+  AddrLabelSymbols = 0;
 }
 
 /// doInitialization - Initialize the state for a new module.
 ///
 bool MachineModuleInfo::doInitialization() {
+  assert(AddrLabelSymbols == 0 && "Improperly initialized");
   return false;
 }
 
 /// doFinalization - Tear down the state after completion of a module.
 ///
 bool MachineModuleInfo::doFinalization() {
+  delete AddrLabelSymbols;
+  AddrLabelSymbols = 0;
   return false;
 }
 
@@ -99,7 +326,45 @@ void MachineModuleInfo::AnalyzeModule(Module &M) {
       UsedFunctions.insert(F);
 }
 
-//===-EH-------------------------------------------------------------------===//
+//===- Address of Block Management ----------------------------------------===//
+
+
+/// getAddrLabelSymbol - Return the symbol to be used for the specified basic
+/// block when its address is taken.  This cannot be its normal LBB label
+/// because the block may be accessed outside its containing function.
+MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
+  // Lazily create AddrLabelSymbols.
+  if (AddrLabelSymbols == 0)
+    AddrLabelSymbols = new MMIAddrLabelMap(Context);
+  return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB));
+}
+
+/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
+/// basic block when its address is taken.  If other blocks were RAUW'd to
+/// this one, we may have to emit them as well, return the whole set.
+std::vector<MCSymbol*> MachineModuleInfo::
+getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+  // Lazily create AddrLabelSymbols.
+  if (AddrLabelSymbols == 0)
+    AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
+}
+
+
+/// takeDeletedSymbolsForFunction - If the specified function has had any
+/// references to address-taken blocks generated, but the block got deleted,
+/// return the symbol now so we can emit it.  This prevents emitting a
+/// reference to a symbol that has no definition.
+void MachineModuleInfo::
+takeDeletedSymbolsForFunction(const Function *F,
+                              std::vector<MCSymbol*> &Result) {
+  // If no blocks have had their addresses taken, we're done.
+  if (AddrLabelSymbols == 0) return;
+  return AddrLabelSymbols->
+     takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
+}
+
+//===- EH -----------------------------------------------------------------===//
 
 /// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
 /// specified MachineBasicBlock.
@@ -119,7 +384,7 @@ LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
 /// addInvoke - Provide the begin and end labels of an invoke style call and
 /// associate it with a try landing pad block.
 void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
-                                  unsigned BeginLabel, unsigned EndLabel) {
+                                  MCSymbol *BeginLabel, MCSymbol *EndLabel) {
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.BeginLabels.push_back(BeginLabel);
   LP.EndLabels.push_back(EndLabel);
@@ -127,8 +392,8 @@ void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
 
 /// addLandingPad - Provide the label of a try LandingPad block.
 ///
-unsigned MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
-  unsigned LandingPadLabel = NextLabelID();
+MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+  MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
   LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
   LP.LandingPadLabel = LandingPadLabel;
   return LandingPadLabel;
@@ -185,7 +450,7 @@ void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
 void MachineModuleInfo::TidyLandingPads() {
   for (unsigned i = 0; i != LandingPads.size(); ) {
     LandingPadInfo &LandingPad = LandingPads[i];
-    if (isLabelDeleted(LandingPad.LandingPadLabel))
+    if (LandingPad.LandingPadLabel && !LandingPad.LandingPadLabel->isDefined())
       LandingPad.LandingPadLabel = 0;
 
     // Special case: we *should* emit LPs with null LP MBB. This indicates
@@ -195,16 +460,14 @@ void MachineModuleInfo::TidyLandingPads() {
       continue;
     }
 
-    for (unsigned j=0; j != LandingPads[i].BeginLabels.size(); ) {
-      unsigned BeginLabel = LandingPad.BeginLabels[j];
-      unsigned EndLabel = LandingPad.EndLabels[j];
-      if (isLabelDeleted(BeginLabel) || isLabelDeleted(EndLabel)) {
-        LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
-        LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
-        continue;
-      }
-
-      ++j;
+    for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+      MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+      MCSymbol *EndLabel = LandingPad.EndLabels[j];
+      if (BeginLabel->isDefined() && EndLabel->isDefined()) continue;
+      
+      LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+      LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+      --j, --e;
     }
 
     // Remove landing pads with no try-ranges.
@@ -218,7 +481,6 @@ void MachineModuleInfo::TidyLandingPads() {
     if (!LandingPad.LandingPadBlock ||
         (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
       LandingPad.TypeIds.clear();
-
     ++i;
   }
 }
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 39d2c75..5ab56c0 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -25,10 +25,9 @@ void MachineModuleInfoMachO::Anchor() {}
 void MachineModuleInfoELF::Anchor() {}
 
 static int SortSymbolPair(const void *LHS, const void *RHS) {
-  const MCSymbol *LHSS =
-    ((const std::pair<MCSymbol*, MCSymbol*>*)LHS)->first;
-  const MCSymbol *RHSS =
-    ((const std::pair<MCSymbol*, MCSymbol*>*)RHS)->first;
+  typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
+  const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
+  const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
   return LHSS->getName().compare(RHSS->getName());
 }
 
@@ -36,7 +35,7 @@ static int SortSymbolPair(const void *LHS, const void *RHS) {
 /// sorted orer.
 MachineModuleInfoImpl::SymbolListTy
 MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
-                                                     MCSymbol*> &Map) {
+                                      MachineModuleInfoImpl::StubValueTy>&Map) {
   MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
 
   if (!List.empty())
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 04303cf..194fc14 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -671,8 +671,10 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
   
   // Live-out (of the function) registers contain return values of the function,
   // so we need to make sure they are alive at return time.
-  if (!MBB.empty() && MBB.back().getDesc().isReturn()) {
-    MachineInstr* Ret = &MBB.back();
+  MachineBasicBlock::iterator Ret = MBB.getFirstTerminator();
+  bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn());
+
+  if (BBEndsInReturn)
     for (MachineRegisterInfo::liveout_iterator
          I = MF->getRegInfo().liveout_begin(),
          E = MF->getRegInfo().liveout_end(); I != E; ++I)
@@ -680,7 +682,6 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
         Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
         LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1);
       }
-  }
   
   // Finally, loop over the final use/def of each reg 
   // in the block and determine if it is dead.
@@ -696,7 +697,10 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
     bool usedOutsideBlock = isPhysReg ? false :   
           UsedInMultipleBlocks.test(MO.getReg() -  
                                     TargetRegisterInfo::FirstVirtualRegister);
-    if (!isPhysReg && !usedOutsideBlock) {
+
+    // If the machine BB ends in a return instruction, then the value isn't used
+    // outside of the BB.
+    if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) {
       // DBG_VALUE complicates this:  if the only refs of a register outside
       // this block are DBG_VALUE, we can't keep the reg live just for that,
       // as it will cause the reg to be spilled at the end of this block when
@@ -704,7 +708,7 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
       // happens.
       bool UsedByDebugValueOnly = false;
       for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
-           UE = MRI.reg_end(); UI != UE; ++UI)
+             UE = MRI.reg_end(); UI != UE; ++UI) {
         // Two cases:
         // - used in another block
         // - used in the same block before it is defined (loop)
@@ -714,6 +718,7 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
             UsedByDebugValueOnly = true;
             continue;
           }
+
           // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
           UsedInMultipleBlocks.set(MO.getReg() - 
                                    TargetRegisterInfo::FirstVirtualRegister);
@@ -721,6 +726,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
           UsedByDebugValueOnly = false;
           break;
         }
+      }
+
       if (UsedByDebugValueOnly)
         for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
              UE = MRI.reg_end(); UI != UE; ++UI)
@@ -730,16 +737,16 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
             UI.getOperand().setReg(0U);
     }
   
-    // Physical registers and those that are not live-out of the block
-    // are killed/dead at their last use/def within this block.
-    if (isPhysReg || !usedOutsideBlock) {
+    // Physical registers and those that are not live-out of the block are
+    // killed/dead at their last use/def within this block.
+    if (isPhysReg || !usedOutsideBlock || BBEndsInReturn)
       if (MO.isUse()) {
         // Don't mark uses that are tied to defs as kills.
         if (!MI->isRegTiedToDefOperand(idx))
           MO.setIsKill(true);
-      } else
+      } else {
         MO.setIsDead(true);
-    }
+      }
   }
 }
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index badf34e..e532ade 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -34,6 +34,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineDominatorTree &mdt)
   : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {
   MFI = mf.getFrameInfo();
+  DbgValueVec.clear();
 }
 
 /// Run - perform scheduling.
@@ -157,6 +158,10 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
   std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
 
+  // Keep track of dangling debug references to registers.
+  std::pair<MachineInstr*, unsigned>
+        DanglingDebugValue[TargetRegisterInfo::FirstVirtualRegister];
+
   // Check to see if the scheduler cares about latencies.
   bool UnitLatencies = ForceUnitLatencies();
 
@@ -164,10 +169,25 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
   unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
 
+  // Remove any stale debug info; sometimes BuildSchedGraph is called again
+  // without emitting the info from the previous call.
+  DbgValueVec.clear();
+  std::memset(DanglingDebugValue, 0, sizeof(DanglingDebugValue));
+
   // Walk the list of instructions, from bottom moving up.
   for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
+    // DBG_VALUE does not have SUnit's built, so just remember these for later
+    // reinsertion.
+    if (MI->isDebugValue()) {
+      if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() &&
+          MI->getOperand(0).getReg())
+        DanglingDebugValue[MI->getOperand(0).getReg()] =
+             std::make_pair(MI, DbgValueVec.size());
+      DbgValueVec.push_back(MI);
+      continue;
+    }
     const TargetInstrDesc &TID = MI->getDesc();
     assert(!TID.isTerminator() && !MI->isLabel() &&
            "Cannot schedule terminators or labels!");
@@ -188,6 +208,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       if (Reg == 0) continue;
 
       assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+
+      if (MO.isDef() && DanglingDebugValue[Reg].first!=0) {
+        SU->setDbgInstr(DanglingDebugValue[Reg].first);
+        DbgValueVec[DanglingDebugValue[Reg].second] = 0;
+        DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0);
+      }
+
       std::vector<SUnit *> &UseList = Uses[Reg];
       std::vector<SUnit *> &DefList = Defs[Reg];
       // Optionally add output and anti dependencies. For anti
@@ -555,6 +582,14 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
     BB->remove(I);
   }
 
+  // First reinsert any remaining debug_values; these are either constants,
+  // or refer to live-in registers.  The beginning of the block is the right
+  // place for the latter.  The former might reasonably be placed elsewhere
+  // using some kind of ordering algorithm, but right now it doesn't matter.
+  for (int i = DbgValueVec.size()-1; i>=0; --i)
+    if (DbgValueVec[i])
+      BB->insert(InsertPos, DbgValueVec[i]);
+
   // Then re-insert them according to the given schedule.
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     SUnit *SU = Sequence[i];
@@ -565,12 +600,21 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
     }
 
     BB->insert(InsertPos, SU->getInstr());
+    if (SU->getDbgInstr())
+      BB->insert(InsertPos, SU->getDbgInstr());
   }
 
   // Update the Begin iterator, as the first instruction in the block
   // may have been scheduled later.
-  if (!Sequence.empty())
+  if (!DbgValueVec.empty()) {
+    for (int i = DbgValueVec.size()-1; i>=0; --i)
+      if (DbgValueVec[i]!=0) {
+        Begin = DbgValueVec[DbgValueVec.size()-1];
+        break;
+      }
+  } else if (!Sequence.empty())
     Begin = Sequence[0]->getInstr();
 
+  DbgValueVec.clear();
   return BB;
 }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index 366c3a8..c9b44de 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -106,6 +106,10 @@ namespace llvm {
     /// initialized and destructed for each block.
     std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister];
     std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister];
+ 
+    /// DbgValueVec - Remember DBG_VALUEs that refer to a particular
+    /// register.
+    std::vector<MachineInstr *>DbgValueVec;
 
     /// PendingLoads - Remember where unknown loads are after the most recent
     /// unknown store, as we iterate. As with Defs and Uses, this is here
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3be6b43..aa283ad 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4605,7 +4605,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
 
   SDNode *Trunc = 0;
   if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
-    // Look pass truncate.
+    // Look past truncate.
     Trunc = N1.getNode();
     N1 = N1.getOperand(0);
   }
@@ -4700,7 +4700,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
           Equal = true;
         }
 
-      EVT SetCCVT = N1.getValueType();
+      SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
+      
+      EVT SetCCVT = NodeToReplace.getValueType();
       if (LegalTypes)
         SetCCVT = TLI.getSetCCResultType(SetCCVT);
       SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
@@ -4709,9 +4711,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                                    Equal ? ISD::SETEQ : ISD::SETNE);
       // Replace the uses of XOR with SETCC
       WorkListRemover DeadNodes(*this);
-      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
-      removeFromWorkList(N1.getNode());
-      DAG.DeleteNode(N1.getNode());
+      DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
+      removeFromWorkList(NodeToReplace.getNode());
+      DAG.DeleteNode(NodeToReplace.getNode());
       return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
                          MVT::Other, Chain, SetCC, N2);
     }
@@ -5409,7 +5411,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     if (SimplifyDemandedBits(Value,
                              APInt::getLowBitsSet(
                                Value.getValueType().getScalarType().getSizeInBits(),
-                               ST->getMemoryVT().getSizeInBits())))
+                               ST->getMemoryVT().getScalarType().getSizeInBits())))
       return SDValue(N, 0);
   }
 
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 1d76c7c..3fc30ff 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -53,7 +53,6 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "SelectionDAGBuilder.h"
 #include "FunctionLoweringInfo.h"
 using namespace llvm;
 
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 625de11..fda094d3 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -15,7 +15,7 @@
 
 #define DEBUG_TYPE "instr-emitter"
 #include "InstrEmitter.h"
-#include "SDDbgValue.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -508,6 +508,7 @@ InstrEmitter::EmitDbgValue(SDNode *Node,
     return;
   if (!sd)
     return;
+  assert(sd->getKind() == SDDbgValue::SDNODE);
   unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap);
   const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
   DebugLoc DL = sd->getDebugLoc();
@@ -524,26 +525,46 @@ InstrEmitter::EmitDbgValue(SDNode *Node,
   MBB->insert(InsertPos, MI);
 }
 
-/// EmitDbgValue - Generate constant debug info.  No SDNode is involved.
+/// EmitDbgValue - Generate debug info that does not refer to a SDNode.
 void
-InstrEmitter::EmitDbgValue(SDDbgValue *sd) {
+InstrEmitter::EmitDbgValue(SDDbgValue *sd,
+                         DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
   if (!sd)
     return;
   const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  uint64_t Offset = sd->getOffset();
+  MDNode* mdPtr = sd->getMDPtr();
+  SDDbgValue::DbgValueKind kind = sd->getKind();
   DebugLoc DL = sd->getDebugLoc();
-  MachineInstr *MI;
-  Value *V = sd->getConst();
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()).
-                                   addImm(sd->getOffset()).
-                                   addMetadata(sd->getMDPtr());
-  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
-    MI = BuildMI(*MF, DL, II).addFPImm(CF).addImm(sd->getOffset()).
-                                   addMetadata(sd->getMDPtr());
+  MachineInstr* MI;
+  if (kind == SDDbgValue::CONST) {
+    Value *V = sd->getConst();
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()).
+                                     addImm(Offset).addMetadata(mdPtr);
+    } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      MI = BuildMI(*MF, DL, II).addFPImm(CF).
+                                     addImm(Offset).addMetadata(mdPtr);
+    } else {
+      // Could be an Undef.  In any case insert an Undef so we can see what we
+      // dropped.
+      MI = BuildMI(*MF, DL, II).addReg(0U).
+                                       addImm(Offset).addMetadata(mdPtr);
+    }
+  } else if (kind == SDDbgValue::FRAMEIX) {
+    unsigned FrameIx = sd->getFrameIx();
+    // Stack address; this needs to be lowered in target-dependent fashion.
+    // FIXME test that the target supports this somehow; if not emit Undef.
+    // Create a pseudo for EmitInstrWithCustomInserter's consumption.
+    MI = BuildMI(*MF, DL, II).addImm(FrameIx).
+                                   addImm(Offset).addMetadata(mdPtr);
+    MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
+    InsertPos = MBB->end();
+    return;
   } else {
     // Insert an Undef so we can see what we dropped.
-    MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
-                                    addMetadata(sd->getMDPtr());
+    MI = BuildMI(*MF, DL, II).addReg(0U).
+                                     addImm(Offset).addMetadata(mdPtr);
   }
   MBB->insert(InsertPos, MI);
 }
@@ -684,6 +705,13 @@ void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
     EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
     break;
   }
+  case ISD::EH_LABEL: {
+    MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+            TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+    break;
+  }
+      
   case ISD::INLINEASM: {
     unsigned NumOps = Node->getNumOperands();
     if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 4fe9f19..eefcd73 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -106,7 +106,8 @@ public:
 
 
   /// EmitDbgValue - Generate a constant DBG_VALUE.  No node is involved.
-  void EmitDbgValue(SDDbgValue* sd);
+  void EmitDbgValue(SDDbgValue* sd,
+                DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
 
   /// EmitNode - Generate machine code for a node and needed dependencies.
   ///
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f498263..b5af2c1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -851,6 +851,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::MERGE_VALUES:
   case ISD::EH_RETURN:
   case ISD::FRAME_TO_ARGS_OFFSET:
+  case ISD::FP16_TO_FP32:
+  case ISD::FP32_TO_FP16:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be expanded.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1585,35 +1587,51 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1 = Node->getOperand(0);
   SDValue Tmp2 = Node->getOperand(1);
-  assert((Tmp2.getValueType() == MVT::f32 ||
-          Tmp2.getValueType() == MVT::f64) &&
-          "Ugly special-cased code!");
-  // Get the sign bit of the RHS.
+
+  // Get the sign bit of the RHS.  First obtain a value that has the same
+  // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
   SDValue SignBit;
-  EVT IVT = Tmp2.getValueType() == MVT::f64 ? MVT::i64 : MVT::i32;
+  EVT FloatVT = Tmp2.getValueType();
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
   if (isTypeLegal(IVT)) {
+    // Convert to an integer with the same sign bit.
     SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
   } else {
-    assert(isTypeLegal(TLI.getPointerTy()) &&
-            (TLI.getPointerTy() == MVT::i32 || 
-            TLI.getPointerTy() == MVT::i64) &&
-            "Legal type for load?!");
-    SDValue StackPtr = DAG.CreateStackTemporary(Tmp2.getValueType());
-    SDValue StorePtr = StackPtr, LoadPtr = StackPtr;
+    // Store the float to memory, then load the sign part out as an integer.
+    MVT LoadTy = TLI.getPointerTy();
+    // First create a temporary that is aligned for both the load and store.
+    SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+    // Then store the float to it.
     SDValue Ch =
-      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StorePtr, NULL, 0,
+      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0,
                    false, false, 0);
-    if (Tmp2.getValueType() == MVT::f64 && TLI.isLittleEndian())
-      LoadPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(),
-                            LoadPtr, DAG.getIntPtrConstant(4));
-    SignBit = DAG.getExtLoad(ISD::SEXTLOAD, dl, TLI.getPointerTy(),
-                             Ch, LoadPtr, NULL, 0, MVT::i32,
-                             false, false, 0);
-  }
-  SignBit =
-      DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
-                    SignBit, DAG.getConstant(0, SignBit.getValueType()),
-                    ISD::SETLT);
+    if (TLI.isBigEndian()) {
+      assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+      // Load out a legal integer with the same sign bit as the float.
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0);
+    } else { // Little endian
+      SDValue LoadPtr = StackPtr;
+      // The float may be wider than the integer we are going to load.  Advance
+      // the pointer so that the loaded integer will contain the sign bit.
+      unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
+      unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
+      LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
+                            LoadPtr, DAG.getIntPtrConstant(ByteOffset));
+      // Load a legal integer containing the sign bit.
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0);
+      // Move the sign bit to the top bit of the loaded integer.
+      unsigned BitShift = LoadTy.getSizeInBits() -
+        (FloatVT.getSizeInBits() - 8 * ByteOffset);
+      assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
+      if (BitShift)
+        SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
+                              DAG.getConstant(BitShift,TLI.getShiftAmountTy()));
+    }
+  }
+  // Now get the sign bit proper, by seeing whether the value is negative.
+  SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+                         SignBit, DAG.getConstant(0, SignBit.getValueType()),
+                         ISD::SETLT);
   // Get the absolute value of the result.
   SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
   // Select between the nabs and abs value based on the sign bit of
@@ -2620,6 +2638,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
                                       RTLIB::REM_F80, RTLIB::REM_PPCF128));
     break;
+  case ISD::FP16_TO_FP32:
+    Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+    break;
+  case ISD::FP32_TO_FP16:
+    Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
+    break;
   case ISD::ConstantFP: {
     ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
     // Check to see if this FP immediate is already legal.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 35a7c7c..665b21f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -79,6 +79,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FNEG:        R = SoftenFloatRes_FNEG(N); break;
     case ISD::FP_EXTEND:   R = SoftenFloatRes_FP_EXTEND(N); break;
     case ISD::FP_ROUND:    R = SoftenFloatRes_FP_ROUND(N); break;
+    case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
     case ISD::FPOW:        R = SoftenFloatRes_FPOW(N); break;
     case ISD::FPOWI:       R = SoftenFloatRes_FPOWI(N); break;
     case ISD::FREM:        R = SoftenFloatRes_FREM(N); break;
@@ -332,6 +333,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
   return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
 }
 
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Op = N->getOperand(0);
@@ -548,6 +557,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
   case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
   case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break;
   case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_UINT(N); break;
+  case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
   case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
   case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
   case ISD::STORE:       Res = SoftenFloatOp_STORE(N, OpNo); break;
@@ -704,6 +714,13 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
   return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
   SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
   ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -800,6 +817,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::FABS:       ExpandFloatRes_FABS(N, Lo, Hi); break;
   case ISD::FADD:       ExpandFloatRes_FADD(N, Lo, Hi); break;
   case ISD::FCEIL:      ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+  case ISD::FCOPYSIGN:  ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
   case ISD::FCOS:       ExpandFloatRes_FCOS(N, Lo, Hi); break;
   case ISD::FDIV:       ExpandFloatRes_FDIV(N, Lo, Hi); break;
   case ISD::FEXP:       ExpandFloatRes_FEXP(N, Lo, Hi); break;
@@ -873,6 +891,17 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
   GetPairElements(Call, Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::COPYSIGN_F32,
+                                         RTLIB::COPYSIGN_F64,
+                                         RTLIB::COPYSIGN_F80,
+                                         RTLIB::COPYSIGN_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
                                            SDValue &Lo, SDValue &Hi) {
   SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 81f28ad..48f64c3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -80,6 +80,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:  Res = PromoteIntRes_FP_TO_XINT(N); break;
 
+  case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR:
@@ -324,6 +326,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
                      NVT, Res, DAG.getValueType(N->getValueType(0)));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+  return DAG.getNode(ISD::AssertZext, dl,
+                     NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   DebugLoc dl = N->getDebugLoc();
@@ -634,6 +646,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::STORE:        Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
                                                    OpNo); break;
   case ISD::TRUNCATE:     Res = PromoteIntOp_TRUNCATE(N); break;
+  case ISD::FP16_TO_FP32:
   case ISD::UINT_TO_FP:   Res = PromoteIntOp_UINT_TO_FP(N); break;
   case ISD::ZERO_EXTEND:  Res = PromoteIntOp_ZERO_EXTEND(N); break;
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b0af357..9dd9796 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -257,6 +257,7 @@ private:
   SDValue PromoteIntRes_CTTZ(SDNode *N);
   SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+  SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
   SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
   SDValue PromoteIntRes_LOAD(LoadSDNode *N);
   SDValue PromoteIntRes_Overflow(SDNode *N);
@@ -406,6 +407,7 @@ private:
   SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
   SDValue SoftenFloatRes_FNEG(SDNode *N);
   SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+  SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
   SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
   SDValue SoftenFloatRes_FPOW(SDNode *N);
   SDValue SoftenFloatRes_FPOWI(SDNode *N);
@@ -429,6 +431,7 @@ private:
   SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
   SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
   SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
   SDValue SoftenFloatOp_SETCC(SDNode *N);
   SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -455,6 +458,7 @@ private:
   void ExpandFloatRes_FABS      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FADD      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCEIL     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCOS      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FDIV      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FEXP      (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/SDDbgValue.h b/lib/CodeGen/SelectionDAG/SDDbgValue.h
deleted file mode 100644
index d43a044..0000000
--- a/lib/CodeGen/SelectionDAG/SDDbgValue.h
+++ /dev/null
@@ -1,73 +0,0 @@
-//===-- llvm/CodeGen/SDDbgValue.h - SD dbg_value handling--------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the SDDbgValue class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SDDBGVALUE_H
-#define LLVM_CODEGEN_SDDBGVALUE_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DebugLoc.h"
-
-namespace llvm {
-
-class MDNode;
-class SDNode;
-class Value;
-
-/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
-/// Either Const or Node is nonzero, but not both.
-/// We do not use SDValue here to avoid including its header.
-
-class SDDbgValue {
-  SDNode *Node;           // valid for non-constants
-  unsigned ResNo;         // valid for non-constants
-  Value *Const;           // valid for constants
-  MDNode *mdPtr;
-  uint64_t Offset;
-  DebugLoc DL;
-  unsigned Order;
-public:
-  // Constructor for non-constants.
-  SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
-             unsigned O) :
-    Node(N), ResNo(R), Const(0), mdPtr(mdP), Offset(off), DL(dl), Order(O) {}
-
-  // Constructor for constants.
-  SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl, unsigned O) : 
-    Node(0), ResNo(0), Const(C), mdPtr(mdP), Offset(off), DL(dl), Order(O) {}
-
-  // Returns the MDNode pointer.
-  MDNode *getMDPtr() { return mdPtr; }
-
-  // Returns the SDNode* (valid for non-constants only).
-  SDNode *getSDNode() { assert (!Const); return Node; }
-
-  // Returns the ResNo (valid for non-constants only).
-  unsigned getResNo() { assert (!Const); return ResNo; }
-
-  // Returns the Value* for a constant (invalid for non-constants).
-  Value *getConst() { assert (!Node); return Const; }
-
-  // Returns the offset.
-  uint64_t getOffset() { return Offset; }
-
-  // Returns the DebugLoc.
-  DebugLoc getDebugLoc() { return DL; }
-
-  // Returns the SDNodeOrder.  This is the order of the preceding node in the
-  // input.
-  unsigned getOrder() { return Order; }
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 0000000..dbbd753
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,104 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H
+#define LLVM_CODEGEN_SDNODEDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+public:
+  enum DbgValueKind {
+    SDNODE = 0,             // value is the result of an expression
+    CONST = 1,              // value is a constant
+    FRAMEIX = 2             // value is contents of a stack location
+  };
+private:
+  enum DbgValueKind kind;
+  union {
+    struct {
+      SDNode *Node;         // valid for expressions
+      unsigned ResNo;       // valid for expressions
+    } s;
+    Value *Const;           // valid for constants
+    unsigned FrameIx;       // valid for stack objects
+  } u;
+  MDNode *mdPtr;
+  uint64_t Offset;
+  DebugLoc DL;
+  unsigned Order;
+public:
+  // Constructor for non-constants.
+  SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
+             unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O) {
+    kind = SDNODE;
+    u.s.Node = N;
+    u.s.ResNo = R;
+  }
+
+  // Constructor for constants.
+  SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl, unsigned O) : 
+    mdPtr(mdP), Offset(off), DL(dl), Order(O) {
+    kind = CONST;
+    u.Const = C;
+  }
+
+  // Constructor for frame indices.
+  SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : 
+    mdPtr(mdP), Offset(off), DL(dl), Order(O) {
+    kind = FRAMEIX;
+    u.FrameIx = FI;
+  }
+
+  // Returns the kind.
+  DbgValueKind getKind() { return kind; }
+
+  // Returns the MDNode pointer.
+  MDNode *getMDPtr() { return mdPtr; }
+
+  // Returns the SDNode* for a register ref
+  SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; }
+
+  // Returns the ResNo for a register ref
+  unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
+
+  // Returns the Value* for a constant
+  Value *getConst() { assert (kind==CONST); return u.Const; }
+
+  // Returns the FrameIx for a stack object
+  unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+
+  // Returns the offset.
+  uint64_t getOffset() { return Offset; }
+
+  // Returns the DebugLoc.
+  DebugLoc getDebugLoc() { return DL; }
+
+  // Returns the SDNodeOrder.  This is the order of the preceding node in the
+  // input.
+  unsigned getOrder() { return Order; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 06e7b8c..c13565a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
+#include "SDNodeDbgValue.h"
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -412,6 +413,14 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
   InstrEmitter Emitter(BB, InsertPos);
   DenseMap<SDValue, unsigned> VRBaseMap;
   DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+
+  // For now, any constant debug info nodes go at the beginning.
+  for (SDDbgInfo::ConstDbgIterator I = DAG->DbgConstBegin(),
+       E = DAG->DbgConstEnd(); I!=E; I++) {
+    Emitter.EmitDbgValue(*I, EM);
+    delete *I;
+  }
+
   for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
     SUnit *SU = Sequence[i];
     if (!SU) {
@@ -435,10 +444,20 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
     while (!FlaggedNodes.empty()) {
       Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
                        VRBaseMap, EM);
+      if (FlaggedNodes.back()->getHasDebugValue())
+        if (SDDbgValue *sd = DAG->GetDbgInfo(FlaggedNodes.back())) {
+          Emitter.EmitDbgValue(FlaggedNodes.back(), VRBaseMap, sd);
+          delete sd;
+        }
       FlaggedNodes.pop_back();
     }
     Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
                      VRBaseMap, EM);
+    if (SU->getNode()->getHasDebugValue())
+      if (SDDbgValue *sd = DAG->GetDbgInfo(SU->getNode())) {
+        Emitter.EmitDbgValue(SU->getNode(), VRBaseMap, sd);
+        delete sd;
+      }
   }
 
   BB = Emitter.getBlock();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 746d4e2..480c068 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "SDNodeOrdering.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/Constants.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Function.h"
@@ -596,6 +597,9 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
 
   // Remove the ordering of this node.
   Ordering->remove(N);
+
+  // And its entry in the debug info table, if any.
+  DbgInfo->remove(N);
 }
 
 /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -793,6 +797,7 @@ SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
     Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
   Ordering = new SDNodeOrdering();
+  DbgInfo = new SDDbgInfo();
 }
 
 void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
@@ -806,6 +811,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
 SelectionDAG::~SelectionDAG() {
   allnodes_clear();
   delete Ordering;
+  delete DbgInfo;
 }
 
 void SelectionDAG::allnodes_clear() {
@@ -833,6 +839,8 @@ void SelectionDAG::clear() {
   Root = getEntryNode();
   delete Ordering;
   Ordering = new SDNodeOrdering();
+  delete DbgInfo;
+  DbgInfo = new SDDbgInfo();
 }
 
 SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
@@ -1306,24 +1314,23 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
-                               SDValue Root,
-                               unsigned LabelID) {
+SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
   FoldingSetNodeID ID;
   SDValue Ops[] = { Root };
-  AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1);
-  ID.AddInteger(LabelID);
+  AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1);
+  ID.AddPointer(Label);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
-
-  SDNode *N = NodeAllocator.Allocate<LabelSDNode>();
-  new (N) LabelSDNode(Opcode, dl, Root, LabelID);
+  
+  SDNode *N = NodeAllocator.Allocate<EHLabelSDNode>();
+  new (N) EHLabelSDNode(dl, Root, Label);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
+
 SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
                                       bool isTarget,
                                       unsigned char TargetFlags) {
@@ -2322,22 +2329,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
   // Constant fold unary operations with an integer constant operand.
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
     const APInt &Val = C->getAPIntValue();
-    unsigned BitWidth = VT.getSizeInBits();
     switch (Opcode) {
     default: break;
     case ISD::SIGN_EXTEND:
-      return getConstant(APInt(Val).sextOrTrunc(BitWidth), VT);
+      return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::ANY_EXTEND:
     case ISD::ZERO_EXTEND:
     case ISD::TRUNCATE:
-      return getConstant(APInt(Val).zextOrTrunc(BitWidth), VT);
+      return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT);
     case ISD::UINT_TO_FP:
     case ISD::SINT_TO_FP: {
       const uint64_t zero[] = {0, 0};
-      // No compile time operations on this type.
-      if (VT==MVT::ppcf128)
-        break;
-      APFloat apf = APFloat(APInt(BitWidth, 2, zero));
+      // No compile time operations on ppcf128.
+      if (VT == MVT::ppcf128) break;
+      APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
       (void)apf.convertFromAPInt(Val,
                                  Opcode==ISD::SINT_TO_FP,
                                  APFloat::rmNearestTiesToEven);
@@ -5264,6 +5269,25 @@ unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
   return Ordering->getOrder(SD);
 }
 
+/// AssignDbgInfo - Assign debug info to the SDNode.
+void SelectionDAG::AssignDbgInfo(SDNode* SD, SDDbgValue* db) {
+  assert(SD && "Trying to assign dbg info to a null node!");
+  DbgInfo->add(SD, db);
+  SD->setHasDebugValue(true);
+}
+
+/// RememberDbgInfo - Remember debug info which is not assigned to an SDNode.
+void SelectionDAG::RememberDbgInfo(SDDbgValue* db) {
+  DbgInfo->add(db);
+}
+
+/// GetDbgInfo - Get the debug info, if any, for the SDNode.
+SDDbgValue* SelectionDAG::GetDbgInfo(const SDNode *SD) {
+  assert(SD && "Trying to get the order of a null node!");
+  if (SD->getHasDebugValue())
+    return DbgInfo->getSDDbgValue(SD);
+  return 0;
+}
 
 //===----------------------------------------------------------------------===//
 //                              SDNode Class
@@ -5639,6 +5663,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FP_TO_SINT:  return "fp_to_sint";
   case ISD::FP_TO_UINT:  return "fp_to_uint";
   case ISD::BIT_CONVERT: return "bit_convert";
+  case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+  case ISD::FP32_TO_FP16: return "fp32_to_fp16";
 
   case ISD::CONVERT_RNDSAT: {
     switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
@@ -5911,7 +5937,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   if (G)
     if (unsigned Order = G->GetOrdering(this))
       OS << " [ORD=" << Order << ']';
-  
+
   if (getNodeId() != -1)
     OS << " [ID=" << getNodeId() << ']';
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 308742b..3d9a4d5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
+#include "SDNodeDbgValue.h"
 #include "SelectionDAGBuilder.h"
 #include "FunctionLoweringInfo.h"
 #include "llvm/ADT/BitVector.h"
@@ -2185,7 +2186,8 @@ void SelectionDAGBuilder::visitSelect(User &I) {
 
   for (unsigned i = 0; i != NumValues; ++i)
     Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
-                            TrueVal.getNode()->getValueType(i), Cond,
+                          TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+                            Cond,
                             SDValue(TrueVal.getNode(),
                                     TrueVal.getResNo() + i),
                             SDValue(FalseVal.getNode(),
@@ -3799,11 +3801,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
     return 0;
   }
   case Intrinsic::dbg_value: {
-    // FIXME: currently, we get here only if OptLevel != CodeGenOpt::None.
-    // The real handling of this intrinsic is in FastISel.
-    if (OptLevel != CodeGenOpt::None)
-      // FIXME: Variable debug info is not supported here.
-      return 0;
     DwarfWriter *DW = DAG.getDwarfWriter();
     if (!DW)
       return 0;
@@ -3812,9 +3809,36 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
       return 0;
 
     MDNode *Variable = DI.getVariable();
+    uint64_t Offset = DI.getOffset();
     Value *V = DI.getValue();
     if (!V)
       return 0;
+
+    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
+    // but do not always have a corresponding SDNode built.  The SDNodeOrder
+    // absolute, but not relative, values are different depending on whether
+    // debug info exists.
+    ++SDNodeOrder;
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
+      SDDbgValue* dv = new SDDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+      DAG.RememberDbgInfo(dv);
+    } else {
+      SDValue &N = NodeMap[V];
+      if (N.getNode()) {
+        SDDbgValue *dv = new SDDbgValue(Variable, N.getNode(),
+                                        N.getResNo(), Offset, dl, SDNodeOrder);
+        DAG.AssignDbgInfo(N.getNode(), dv);
+      } else {
+        // We may expand this to cover more cases.  One case where we have no
+        // data available is an unreferenced parameter; we need this fallback.
+        SDDbgValue* dv = new SDDbgValue(Variable, 
+                                        UndefValue::get(V->getType()),
+                                        Offset, dl, SDNodeOrder);
+        DAG.RememberDbgInfo(dv);
+      }
+    }
+
+    // Build a debug info table entry.
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
       V = BCI->getOperand(0);
     AllocaInst *AI = dyn_cast<AllocaInst>(V);
@@ -3999,6 +4023,14 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
   case Intrinsic::pow:
     visitPow(I);
     return 0;
+  case Intrinsic::convert_to_fp16:
+    setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
+                             MVT::i16, getValue(I.getOperand(1))));
+    return 0;
+  case Intrinsic::convert_from_fp16:
+    setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
+                             MVT::f32, getValue(I.getOperand(1))));
+    return 0;
   case Intrinsic::pcmarker: {
     SDValue Tmp = getValue(I.getOperand(1));
     DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
@@ -4301,7 +4333,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
   const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   const Type *RetTy = FTy->getReturnType();
   MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
-  unsigned BeginLabel = 0, EndLabel = 0;
+  MCSymbol *BeginLabel = 0;
 
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
@@ -4361,7 +4393,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
   if (LandingPad && MMI) {
     // Insert a label before the invoke call to mark the try range.  This can be
     // used to detect deletion of the invoke via the MachineModuleInfo.
-    BeginLabel = MMI->NextLabelID();
+    BeginLabel = MMI->getContext().CreateTempSymbol();
 
     // For SjLj, keep track of which landing pads go with which invokes
     // so as to maintain the ordering of pads in the LSDA.
@@ -4375,8 +4407,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
     // Both PendingLoads and PendingExports must be flushed here;
     // this call might not return.
     (void)getRoot();
-    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
-                             getControlRoot(), BeginLabel));
+    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
   }
 
   // Check if target-independent constraints permit a tail call here.
@@ -4464,9 +4495,8 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
   if (LandingPad && MMI) {
     // Insert a label at the end of the invoke call to mark the try range.  This
     // can be used to detect deletion of the invoke via the MachineModuleInfo.
-    EndLabel = MMI->NextLabelID();
-    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
-                             getRoot(), EndLabel));
+    MCSymbol *EndLabel = MMI->getContext().CreateTempSymbol();
+    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
 
     // Inform MachineModuleInfo of range.
     MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
@@ -4632,7 +4662,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
     // can't be a library call.
     if (!F->hasLocalLinkage() && F->hasName()) {
       StringRef Name = F->getName();
-      if (Name == "copysign" || Name == "copysignf") {
+      if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
         if (I.getNumOperands() == 3 &&   // Basic sanity checks.
             I.getOperand(1)->getType()->isFloatingPointTy() &&
             I.getType() == I.getOperand(1)->getType() &&
@@ -5777,12 +5807,6 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
          "LowerCall emitted a return value for a tail call!");
   assert((isTailCall || InVals.size() == Ins.size()) &&
          "LowerCall didn't emit the correct number of values!");
-  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-          assert(InVals[i].getNode() &&
-                 "LowerCall emitted a null value!");
-          assert(Ins[i].VT == InVals[i].getValueType() &&
-                 "LowerCall emitted a value with the wrong type!");
-        });
 
   // For a tail call, the return value is merely live-out and there aren't
   // any nodes in the DAG representing it. Return a special value to
@@ -5793,6 +5817,13 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
     return std::make_pair(SDValue(), SDValue());
   }
 
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerCall emitted a null value!");
+          assert(Ins[i].VT == InVals[i].getValueType() &&
+                 "LowerCall emitted a value with the wrong type!");
+        });
+
   // Collect the legal value parts into potentially illegal values
   // that correspond to the original function's return values.
   ISD::NodeType AssertOp = ISD::DELETED_NODE;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index a82f0f7..cbbe431 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -368,8 +368,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 static void SetDebugLoc(unsigned MDDbgKind, Instruction *I,
                         SelectionDAGBuilder *SDB,
                         FastISel *FastIS, MachineFunction *MF) {
-  if (isa<DbgInfoIntrinsic>(I)) return;
-  
   if (MDNode *Dbg = I->getMetadata(MDDbgKind)) {
     DILocation DILoc(Dbg);
     DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
@@ -446,12 +444,25 @@ namespace {
 /// nodes from the worklist.
 class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener {
   SmallVector<SDNode*, 128> &Worklist;
+  SmallPtrSet<SDNode*, 128> &InWorklist;
 public:
-  SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl) : Worklist(wl) {}
+  SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl,
+                       SmallPtrSet<SDNode*, 128> &inwl)
+    : Worklist(wl), InWorklist(inwl) {}
 
+  void RemoveFromWorklist(SDNode *N) {
+    if (!InWorklist.erase(N)) return;
+    
+    SmallVector<SDNode*, 128>::iterator I =
+    std::find(Worklist.begin(), Worklist.end(), N);
+    assert(I != Worklist.end() && "Not in worklist");
+    
+    *I = Worklist.back();
+    Worklist.pop_back();
+  }
+  
   virtual void NodeDeleted(SDNode *N, SDNode *E) {
-    Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
-                   Worklist.end());
+    RemoveFromWorklist(N);
   }
 
   virtual void NodeUpdated(SDNode *N) {
@@ -480,70 +491,79 @@ static bool TrivialTruncElim(SDValue Op,
 /// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
 void SelectionDAGISel::ShrinkDemandedOps() {
   SmallVector<SDNode*, 128> Worklist;
+  SmallPtrSet<SDNode*, 128> InWorklist;
 
   // Add all the dag nodes to the worklist.
   Worklist.reserve(CurDAG->allnodes_size());
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
-       E = CurDAG->allnodes_end(); I != E; ++I)
+       E = CurDAG->allnodes_end(); I != E; ++I) {
     Worklist.push_back(I);
-
-  APInt Mask;
-  APInt KnownZero;
-  APInt KnownOne;
+    InWorklist.insert(I);
+  }
 
   TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
   while (!Worklist.empty()) {
     SDNode *N = Worklist.pop_back_val();
+    InWorklist.erase(N);
 
     if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+      // Deleting this node may make its operands dead, add them to the worklist
+      // if they aren't already there.
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        if (InWorklist.insert(N->getOperand(i).getNode()))
+          Worklist.push_back(N->getOperand(i).getNode());
+      
       CurDAG->DeleteNode(N);
       continue;
     }
 
     // Run ShrinkDemandedOp on scalar binary operations.
-    if (N->getNumValues() == 1 &&
-        N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) {
-      unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
-      APInt Demanded = APInt::getAllOnesValue(BitWidth);
-      APInt KnownZero, KnownOne;
-      if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
-                                   KnownZero, KnownOne, TLO) ||
-          (N->getOpcode() == ISD::TRUNCATE &&
-           TrivialTruncElim(SDValue(N, 0), TLO))) {
-        // Revisit the node.
-        Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
-                       Worklist.end());
-        Worklist.push_back(N);
-
-        // Replace the old value with the new one.
-        DEBUG(errs() << "\nReplacing "; 
-              TLO.Old.getNode()->dump(CurDAG);
-              errs() << "\nWith: ";
-              TLO.New.getNode()->dump(CurDAG);
-              errs() << '\n');
-
-        Worklist.push_back(TLO.New.getNode());
-
-        SDOPsWorkListRemover DeadNodes(Worklist);
-        CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
-
-        if (TLO.Old.getNode()->use_empty()) {
-          for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
-               i != e; ++i) {
-            SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); 
-            if (OpNode->hasOneUse()) {
-              Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
-                                         OpNode), Worklist.end());
-              Worklist.push_back(OpNode);
-            }
-          }
+    if (N->getNumValues() != 1 ||
+        !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger())
+      continue;
+    
+    unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+    APInt Demanded = APInt::getAllOnesValue(BitWidth);
+    APInt KnownZero, KnownOne;
+    if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+                                  KnownZero, KnownOne, TLO) &&
+        (N->getOpcode() != ISD::TRUNCATE ||
+         !TrivialTruncElim(SDValue(N, 0), TLO)))
+      continue;
+    
+    // Revisit the node.
+    assert(!InWorklist.count(N) && "Already in worklist");
+    Worklist.push_back(N);
+    InWorklist.insert(N);
 
-          Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
-                                     TLO.Old.getNode()), Worklist.end());
-          CurDAG->DeleteNode(TLO.Old.getNode());
-        }
+    // Replace the old value with the new one.
+    DEBUG(errs() << "\nShrinkDemandedOps replacing "; 
+          TLO.Old.getNode()->dump(CurDAG);
+          errs() << "\nWith: ";
+          TLO.New.getNode()->dump(CurDAG);
+          errs() << '\n');
+
+    if (InWorklist.insert(TLO.New.getNode()))
+      Worklist.push_back(TLO.New.getNode());
+
+    SDOPsWorkListRemover DeadNodes(Worklist, InWorklist);
+    CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+    if (!TLO.Old.getNode()->use_empty()) continue;
+        
+    for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+         i != e; ++i) {
+      SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); 
+      if (OpNode->hasOneUse()) {
+        // Add OpNode to the end of the list to revisit.
+        DeadNodes.RemoveFromWorklist(OpNode);
+        Worklist.push_back(OpNode);
+        InWorklist.insert(OpNode);
       }
     }
+
+    DeadNodes.RemoveFromWorklist(TLO.Old.getNode());
+    CurDAG->DeleteNode(TLO.Old.getNode());
   }
 }
 
@@ -715,13 +735,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
-  if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
-
   if (OptLevel != CodeGenOpt::None) {
     ShrinkDemandedOps();
     ComputeLiveOutVRegInfo();
   }
 
+  if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
   // Third, instruction select all of the operations to machine code, adding the
   // code to the MachineBasicBlock.
   if (TimePassesIsEnabled) {
@@ -879,10 +899,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
     if (MMI && BB->isLandingPad()) {
       // Add a label to mark the beginning of the landing pad.  Deletion of the
       // landing pad can thus be detected via the MachineModuleInfo.
-      unsigned LabelID = MMI->addLandingPad(BB);
+      MCSymbol *Label = MMI->addLandingPad(BB);
 
       const TargetInstrDesc &II = TII.get(TargetOpcode::EH_LABEL);
-      BuildMI(BB, SDB->getCurDebugLoc(), II).addImm(LabelID);
+      BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label);
 
       // Mark exception register as live in.
       unsigned Reg = TLI.getExceptionAddressRegister();
@@ -1517,14 +1537,6 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
   return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
 }
 
-SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
-  SDValue Chain = N->getOperand(0);
-  unsigned C = cast<LabelSDNode>(N)->getLabelID();
-  SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
-  return CurDAG->SelectNodeTo(N, TargetOpcode::EH_LABEL,
-                              MVT::Other, Tmp, Chain);
-}
-
 /// GetVBR - decode a vbr encoding whose top bit is set.
 ALWAYS_INLINE static uint64_t
 GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
@@ -1651,7 +1663,8 @@ WalkChainUsers(SDNode *ChainedNode,
     
     if (User->getOpcode() == ISD::CopyToReg ||
         User->getOpcode() == ISD::CopyFromReg ||
-        User->getOpcode() == ISD::INLINEASM) {
+        User->getOpcode() == ISD::INLINEASM ||
+        User->getOpcode() == ISD::EH_LABEL) {
       // If their node ID got reset to -1 then they've already been selected.
       // Treat them like a MachineOpcode.
       if (User->getNodeId() == -1)
@@ -2042,6 +2055,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   case ISD::EntryToken:       // These nodes remain the same.
   case ISD::BasicBlock:
   case ISD::Register:
+  //case ISD::VALUETYPE:
+  //case ISD::CONDCODE:
   case ISD::HANDLENODE:
   case ISD::TargetConstant:
   case ISD::TargetConstantFP:
@@ -2055,6 +2070,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   case ISD::TokenFactor:
   case ISD::CopyFromReg:
   case ISD::CopyToReg:
+  case ISD::EH_LABEL:
     NodeToMatch->setNodeId(-1); // Mark selected.
     return 0;
   case ISD::AssertSext:
@@ -2063,7 +2079,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
                                       NodeToMatch->getOperand(0));
     return 0;
   case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
-  case ISD::EH_LABEL:  return Select_EH_LABEL(NodeToMatch);
   case ISD::UNDEF:     return Select_UNDEF(NodeToMatch);
   }
   
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0e54ca4..f7ef2d6 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -174,7 +174,13 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::FLOOR_F64] = "floor";
   Names[RTLIB::FLOOR_F80] = "floorl";
   Names[RTLIB::FLOOR_PPCF128] = "floorl";
+  Names[RTLIB::COPYSIGN_F32] = "copysignf";
+  Names[RTLIB::COPYSIGN_F64] = "copysign";
+  Names[RTLIB::COPYSIGN_F80] = "copysignl";
+  Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
   Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+  Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+  Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
   Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
   Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
   Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
@@ -269,6 +275,7 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
     if (RetVT == MVT::f64)
       return FPEXT_F32_F64;
   }
+
   return UNKNOWN_LIBCALL;
 }
 
@@ -288,6 +295,7 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
     if (OpVT == MVT::ppcf128)
       return FPROUND_PPCF128_F64;
   }
+
   return UNKNOWN_LIBCALL;
 }
 
@@ -792,20 +800,6 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
   return 1;
 }
 
-/// getWidenVectorType: given a vector type, returns the type to widen to
-/// (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
-/// If there is no vector type that we want to widen to, returns MVT::Other
-/// When and where to widen is target dependent based on the cost of
-/// scalarizing vs using the wider vector type.
-EVT TargetLowering::getWidenVectorType(EVT VT) const {
-  assert(VT.isVector());
-  if (isTypeLegal(VT))
-    return VT;
- 
-  // Default is not to widen until moved to LegalizeTypes
-  return MVT::Other;
-}
-
 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 /// function arguments in the caller parameter area.  This is the actual
 /// alignment, not its logarithm.
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index d25df1d..5c62118 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -1260,10 +1260,10 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
   RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
   assert(RealDstReg && "Invalid extract_subreg instruction!");
 
+  LiveInterval &RHS = li_->getInterval(SrcReg);
   // For this type of EXTRACT_SUBREG, conservatively
   // check if the live interval of the source register interfere with the
   // actual super physical register we are trying to coalesce with.
-  LiveInterval &RHS = li_->getInterval(SrcReg);
   if (li_->hasInterval(RealDstReg) &&
       RHS.overlaps(li_->getInterval(RealDstReg))) {
     DEBUG({
@@ -1273,7 +1273,11 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
-    if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+    // Do not check DstReg or its sub-register. JoinIntervals() will take care
+    // of that.
+    if (*SR != DstReg &&
+        !tri_->isSubRegister(DstReg, *SR) &&
+        li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
           dbgs() << "Interfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
@@ -1294,9 +1298,9 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
   RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
   assert(RealSrcReg && "Invalid extract_subreg instruction!");
 
-  LiveInterval &RHS = li_->getInterval(DstReg);
+  LiveInterval &LHS = li_->getInterval(DstReg);
   if (li_->hasInterval(RealSrcReg) &&
-      RHS.overlaps(li_->getInterval(RealSrcReg))) {
+      LHS.overlaps(li_->getInterval(RealSrcReg))) {
     DEBUG({
         dbgs() << "Interfere with register ";
         li_->getInterval(RealSrcReg).print(dbgs(), tri_);
@@ -1304,7 +1308,11 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
-    if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
+    // Do not check SrcReg or its sub-register. JoinIntervals() will take care
+    // of that.
+    if (*SR != SrcReg &&
+        !tri_->isSubRegister(SrcReg, *SR) &&
+        li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
           dbgs() << "Interfere with sub-register ";
           li_->getInterval(*SR).print(dbgs(), tri_);
@@ -1476,6 +1484,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         return false; // Not coalescable.
       }
 
+      // FIXME: The following checks are somewhat conservative. Perhaps a better
+      // way to implement this is to treat this as coalescing a vr with the
+      // super physical register.
       if (isExtSubReg) {
         if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg))
           return false; // Not coalescable
@@ -2205,7 +2216,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
         li_->intervalIsInOneMBB(RHS) &&
         li_->getApproximateInstructionCount(RHS) <= 10) {
       // Perform a more exhaustive check for some common cases.
-      if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
+      if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
         return false;
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
@@ -2222,7 +2233,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
     if (LHS.containsOneValue() &&
         li_->getApproximateInstructionCount(LHS) <= 10) {
       // Perform a more exhaustive check for some common cases.
-      if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
+      if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
         return false;
     } else {
       for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 3223e53..fa3785d 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -648,17 +648,6 @@ void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end()-1);
 
-  // If there are any labels in the basic block, unregister them from
-  // MachineModuleInfo.
-  if (MMI && !MBB->empty()) {
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
-         I != E; ++I) {
-      if (I->isLabel())
-        // The label ID # is always operand #0, an immediate.
-        MMI->InvalidateLabel(I->getOperand(0).getImm());
-    }
-  }
-
   // Remove the block.
   MBB->eraseFromParent();
 }
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2014b42..b62cca3 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -53,11 +53,13 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags,
   ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)UniquingMap;
 
   // Do the lookup, if we have a hit, return it.
-  const MCSectionELF *&Entry = Map[Section];
-  if (Entry) return Entry;
+  StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
+  if (Entry.getValue()) return Entry.getValue();
 
-  return Entry = MCSectionELF::Create(Section, Type, Flags, Kind, IsExplicit,
-                                      getContext());
+  MCSectionELF *Result = MCSectionELF::Create(Entry.getKey(), Type, Flags, Kind,
+                                              IsExplicit, getContext());
+  Entry.setValue(Result);
+  return Result;
 }
 
 void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
@@ -306,9 +308,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   // into a 'uniqued' section name, create and return the section now.
   if (GV->isWeakForLinker() && !Kind.isCommon() && !Kind.isBSS()) {
     const char *Prefix = getSectionPrefixForUniqueGlobal(Kind);
-    SmallString<128> Name;
-    Name.append(Prefix, Prefix+strlen(Prefix));
-    Mang->getNameWithPrefix(Name, GV, false);
+    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append(Sym->getName().begin(), Sym->getName().end());
     return getELFSection(Name.str(), getELFSectionType(Name.str(), Kind),
                          getELFSectionFlags(Kind), Kind);
   }
@@ -391,8 +393,9 @@ getSectionForConstant(SectionKind Kind) const {
 }
 
 const MCExpr *TargetLoweringObjectFileELF::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                             MachineModuleInfo *MMI, unsigned Encoding) const {
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI,
+                               unsigned Encoding, MCStreamer &Streamer) const {
 
   if (Encoding & dwarf::DW_EH_PE_indirect) {
     MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
@@ -403,24 +406,20 @@ getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
 
     // Add information about the stub reference to ELFMMI so that the stub
     // gets emitted by the asmprinter.
-    MCSymbol *Sym = getContext().GetOrCreateTemporarySymbol(Name.str());
-    MCSymbol *&StubSym = ELFMMI.getGVStubEntry(Sym);
-    if (StubSym == 0) {
-      Name.clear();
-      Mang->getNameWithPrefix(Name, GV, false);
-      if (GV->hasPrivateLinkage())
-        StubSym = getContext().GetOrCreateTemporarySymbol(Name.str());
-      else
-        StubSym = getContext().GetOrCreateSymbol(Name.str());
+    MCSymbol *SSym = getContext().GetOrCreateTemporarySymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
     return TargetLoweringObjectFile::
-      getSymbolForDwarfReference(Sym, MMI,
-                                 Encoding & ~dwarf::DW_EH_PE_indirect);
+      getExprForDwarfReference(SSym, Mang, MMI,
+                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
-    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
 //===----------------------------------------------------------------------===//
@@ -561,8 +560,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   }
 
   // Exception Handling.
-  LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
-                                SectionKind::getDataRel());
+  LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0,
+                                SectionKind::getReadOnlyWithRel());
   EHFrameSection =
     getMachOSection("__TEXT", "__eh_frame",
                     MCSectionMachO::S_COALESCED |
@@ -736,9 +735,8 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
     // FIXME: ObjC metadata is currently emitted as internal symbols that have
     // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
     // this horrible hack can go away.
-    SmallString<64> Name;
-    Mang->getNameWithPrefix(Name, GV, false);
-    if (Name[0] == 'L' || Name[0] == 'l')
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
       return false;
   }
 
@@ -746,8 +744,9 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
 }
 
 const MCExpr *TargetLoweringObjectFileMachO::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                             MachineModuleInfo *MMI, unsigned Encoding) const {
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
   // The mach-o version of this method defaults to returning a stub reference.
 
   if (Encoding & DW_EH_PE_indirect) {
@@ -760,24 +759,20 @@ getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
 
     // Add information about the stub reference to MachOMMI so that the stub
     // gets emitted by the asmprinter.
-    MCSymbol *Sym = getContext().GetOrCreateTemporarySymbol(Name.str());
-    MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
-    if (StubSym == 0) {
-      Name.clear();
-      Mang->getNameWithPrefix(Name, GV, false);
-      if (GV->hasPrivateLinkage())
-        StubSym = getContext().GetOrCreateTemporarySymbol(Name.str());
-      else
-        StubSym = getContext().GetOrCreateSymbol(Name.str());
+    MCSymbol *SSym = getContext().GetOrCreateTemporarySymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
     return TargetLoweringObjectFile::
-      getSymbolForDwarfReference(Sym, MMI,
-                                 Encoding & ~dwarf::DW_EH_PE_indirect);
+      getExprForDwarfReference(SSym, Mang, MMI,
+                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
   return TargetLoweringObjectFile::
-    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
 unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
@@ -793,7 +788,7 @@ unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const {
 }
 
 unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
-  return DW_EH_PE_absptr;
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
 }
 
 //===----------------------------------------------------------------------===//
@@ -904,7 +899,8 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if (GV->isWeakForLinker()) {
     const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
     SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
-    Mang->getNameWithPrefix(Name, GV, false);
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append(Sym->getName().begin(), Sym->getName().end());
     return getCOFFSection(Name.str(), false, Kind);
   }
 
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index b0f0a07..7b33812 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -165,20 +165,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
   }
 
   // Actually remove the blocks now.
-  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = DeadBlocks[i];
-    // If there are any labels in the basic block, unregister them from
-    // MachineModuleInfo.
-    if (MMI && !MBB->empty()) {
-      for (MachineBasicBlock::iterator I = MBB->begin(),
-             E = MBB->end(); I != E; ++I) {
-        if (I->isLabel())
-          // The label ID # is always operand #0, an immediate.
-          MMI->InvalidateLabel(I->getOperand(0).getImm());
-      }
-    }
-    MBB->eraseFromParent();
-  }
+  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+    DeadBlocks[i]->eraseFromParent();
 
   // Cleanup PHI nodes.
   for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 7aa0a91..44d5311 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -98,7 +98,7 @@ struct TrivialRewriter : public VirtRegRewriter {
   bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
                             LiveIntervals* LIs) {
     DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
-    DEBUG(dbgs() << "********** Function: " 
+    DEBUG(dbgs() << "********** Function: "
           << MF.getFunction()->getName() << '\n');
     DEBUG(dbgs() << "**** Machine Instrs"
           << "(NOTE! Does not include spills and reloads!) ****\n");
@@ -135,10 +135,10 @@ struct TrivialRewriter : public VirtRegRewriter {
         changed |= !reglist.empty();
       }
     }
-    
+
     DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
     DEBUG(MF.dump());
-    
+
     return changed;
   }
 
@@ -208,7 +208,7 @@ public:
   /// in the specified physreg.  If CanClobber is true, the physreg can be
   /// modified at any time without changing the semantics of the program.
   void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
-    // If this stack slot is thought to be available in some other physreg, 
+    // If this stack slot is thought to be available in some other physreg,
     // remove its record.
     ModifyStackSlotOrReMat(SlotOrReMat);
 
@@ -364,7 +364,7 @@ struct ReusedOp {
 
   // AssignedPhysReg - The physreg that was assigned for use by the reload.
   unsigned AssignedPhysReg;
-  
+
   // VirtReg - The virtual register itself.
   unsigned VirtReg;
 
@@ -384,11 +384,11 @@ public:
   ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
     PhysRegsClobbered.resize(tri->getNumRegs());
   }
-  
+
   bool hasReuses() const {
     return !Reuses.empty();
   }
-  
+
   /// addReuse - If we choose to reuse a virtual register that is already
   /// available instead of reloading it, remember that we did so.
   void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
@@ -397,9 +397,9 @@ public:
     // If the reload is to the assigned register anyway, no undo will be
     // required.
     if (PhysRegReused == AssignedPhysReg) return;
-    
+
     // Otherwise, remember this.
-    Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused, 
+    Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
                               AssignedPhysReg, VirtReg));
   }
 
@@ -410,10 +410,10 @@ public:
   bool isClobbered(unsigned PhysReg) const {
     return PhysRegsClobbered.test(PhysReg);
   }
-  
+
   /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
   /// is some other operand that is using the specified register, either pick
-  /// a new register to use, or evict the previous reload and use this reg. 
+  /// a new register to use, or evict the previous reload and use this reg.
   unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
                            MachineFunction &MF, MachineInstr *MI,
                            AvailableSpills &Spills,
@@ -525,7 +525,7 @@ static void InvalidateKills(MachineInstr &MI,
 /// reference.
 static bool InvalidateRegDef(MachineBasicBlock::iterator I,
                              MachineInstr &NewDef, unsigned Reg,
-                             bool &HasLiveDef, 
+                             bool &HasLiveDef,
                              const TargetRegisterInfo *TRI) {
   // Due to remat, it's possible this reg isn't being reused. That is,
   // the def of this reg (by prev MI) is now dead.
@@ -579,7 +579,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
     unsigned Reg = MO.getReg();
     if (Reg == 0)
       continue;
-    
+
     if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
       // That can't be right. Register is killed but not re-defined and it's
       // being reused. Let's fix that.
@@ -597,7 +597,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
       }
     } else {
       // Check for subreg kills as well.
-      // d4 = 
+      // d4 =
       // store d4, fi#0
       // ...
       //    = s8<kill>
@@ -802,7 +802,7 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
   if (It == SpillSlotsOrReMatsAvailable.end()) return;
   unsigned Reg = It->second >> 1;
   SpillSlotsOrReMatsAvailable.erase(It);
-  
+
   // This register may hold the value of multiple stack slots, only remove this
   // stack slot from the set of values the register contains.
   std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
@@ -832,7 +832,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
                          VirtRegMap &VRM) {
   const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
   const TargetRegisterInfo *TRI = Spills.getRegInfo();
-  
+
   if (Reuses.empty()) return PhysReg;  // This is most often empty.
 
   for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
@@ -853,7 +853,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
     } else {
       // Otherwise, we might also have a problem if a previously reused
       // value aliases the new register. If so, codegen the previous reload
-      // and use this one.          
+      // and use this one.
       unsigned PRRU = Op.PhysRegReused;
       if (TRI->regsOverlap(PRRU, PhysReg)) {
         // Okay, we found out that an alias of a reused register
@@ -900,13 +900,13 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
         if (DoReMat) {
           ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
                         TRI, VRM);
-        } else { 
+        } else {
           TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
                                     NewOp.StackSlotOrReMat, AliasRC);
           MachineInstr *LoadMI = prior(InsertLoc);
           VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
           // Any stores to this stack slot are not dead anymore.
-          MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;            
+          MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
           ++NumLoads;
         }
         Spills.ClobberPhysReg(NewPhysReg);
@@ -919,10 +919,10 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
         Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
         UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
         DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-        
+
         DEBUG(dbgs() << "Reuse undone!\n");
         --NumReused;
-        
+
         // Finally, PhysReg is now available, go ahead and use it.
         return PhysReg;
       }
@@ -1037,1410 +1037,1476 @@ void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
 }
 
 namespace {
-  struct RefSorter {
-    bool operator()(const std::pair<MachineInstr*, int> &A,
-                    const std::pair<MachineInstr*, int> &B) {
-      return A.second < B.second;
-    }
-  };
-}
+
+struct RefSorter {
+  bool operator()(const std::pair<MachineInstr*, int> &A,
+                  const std::pair<MachineInstr*, int> &B) {
+    return A.second < B.second;
+  }
+};
 
 // ***************************** //
 // Local Spiller Implementation  //
 // ***************************** //
 
-namespace {
-
 class LocalRewriter : public VirtRegRewriter {
-  MachineRegisterInfo *RegInfo;
+  MachineRegisterInfo *MRI;
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
+  VirtRegMap *VRM;
   BitVector AllocatableRegs;
   DenseMap<MachineInstr*, unsigned> DistanceMap;
-public:
-
-  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                            LiveIntervals* LIs) {
-    RegInfo = &MF.getRegInfo(); 
-    TRI = MF.getTarget().getRegisterInfo();
-    TII = MF.getTarget().getInstrInfo();
-    AllocatableRegs = TRI->getAllocatableSet(MF);
-    DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
-          << MF.getFunction()->getName() << "':\n");
-    DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
-                    " reloads!) ****\n");
-    DEBUG(MF.dump());
 
-    // Spills - Keep track of which spilled values are available in physregs
-    // so that we can choose to reuse the physregs instead of emitting
-    // reloads. This is usually refreshed per basic block.
-    AvailableSpills Spills(TRI, TII);
-
-    // Keep track of kill information.
-    BitVector RegKills(TRI->getNumRegs());
-    std::vector<MachineOperand*> KillOps;
-    KillOps.resize(TRI->getNumRegs(), NULL);
-
-    // SingleEntrySuccs - Successor blocks which have a single predecessor.
-    SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
-    SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
-
-    // Traverse the basic blocks depth first.
-    MachineBasicBlock *Entry = MF.begin();
-    SmallPtrSet<MachineBasicBlock*,16> Visited;
-    for (df_ext_iterator<MachineBasicBlock*,
-           SmallPtrSet<MachineBasicBlock*,16> >
-           DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
-         DFI != E; ++DFI) {
-      MachineBasicBlock *MBB = *DFI;
-      if (!EarlyVisited.count(MBB))
-        RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
-
-      // If this MBB is the only predecessor of a successor. Keep the
-      // availability information and visit it next.
-      do {
-        // Keep visiting single predecessor successor as long as possible.
-        SinglePredSuccs.clear();
-        findSinglePredSuccessor(MBB, SinglePredSuccs);
-        if (SinglePredSuccs.empty())
-          MBB = 0;
-        else {
-          // FIXME: More than one successors, each of which has MBB has
-          // the only predecessor.
-          MBB = SinglePredSuccs[0];
-          if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
-            Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
-            RewriteMBB(*MBB, VRM, LIs, Spills, RegKills, KillOps);
-          }
-        }
-      } while (MBB);
-
-      // Clear the availability info.
-      Spills.clear();
-    }
+  MachineBasicBlock *MBB;       // Basic block currently being processed.
 
-    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
-    DEBUG(MF.dump());
-
-    // Mark unused spill slots.
-    MachineFrameInfo *MFI = MF.getFrameInfo();
-    int SS = VRM.getLowSpillSlot();
-    if (SS != VirtRegMap::NO_STACK_SLOT)
-      for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS)
-        if (!VRM.isSpillSlotUsed(SS)) {
-          MFI->RemoveStackObject(SS);
-          ++NumDSS;
-        }
+public:
 
-    return true;
-  }
+  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+                            LiveIntervals* LIs);
 
 private:
 
-  /// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
-  /// a scratch register is available.
-  ///     xorq  %r12<kill>, %r13
-  ///     addq  %rax, -184(%rbp)
-  ///     addq  %r13, -184(%rbp)
-  /// ==>
-  ///     xorq  %r12<kill>, %r13
-  ///     movq  -184(%rbp), %r12
-  ///     addq  %rax, %r12
-  ///     addq  %r13, %r12
-  ///     movq  %r12, -184(%rbp)
   bool OptimizeByUnfold2(unsigned VirtReg, int SS,
-                         MachineBasicBlock &MBB,
                          MachineBasicBlock::iterator &MII,
                          std::vector<MachineInstr*> &MaybeDeadStores,
                          AvailableSpills &Spills,
                          BitVector &RegKills,
-                         std::vector<MachineOperand*> &KillOps,
-                         VirtRegMap &VRM) {
+                         std::vector<MachineOperand*> &KillOps);
 
-    MachineBasicBlock::iterator NextMII = llvm::next(MII);
-    if (NextMII == MBB.end())
-      return false;
+  bool OptimizeByUnfold(MachineBasicBlock::iterator &MII,
+                        std::vector<MachineInstr*> &MaybeDeadStores,
+                        AvailableSpills &Spills,
+                        BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps);
 
-    if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
-      return false;
+  bool CommuteToFoldReload(MachineBasicBlock::iterator &MII,
+                           unsigned VirtReg, unsigned SrcReg, int SS,
+                           AvailableSpills &Spills,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps,
+                           const TargetRegisterInfo *TRI);
 
-    // Now let's see if the last couple of instructions happens to have freed up
-    // a register.
-    const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-    unsigned PhysReg = FindFreeRegister(MII, MBB, RC, TRI, AllocatableRegs);
-    if (!PhysReg)
-      return false;
+  void SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
+                           int Idx, unsigned PhysReg, int StackSlot,
+                           const TargetRegisterClass *RC,
+                           bool isAvailable, MachineInstr *&LastStore,
+                           AvailableSpills &Spills,
+                           SmallSet<MachineInstr*, 4> &ReMatDefs,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps);
 
-    MachineFunction &MF = *MBB.getParent();
-    TRI = MF.getTarget().getRegisterInfo();
-    MachineInstr &MI = *MII;
-    if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, VRM))
-      return false;
+  void TransferDeadness(unsigned Reg, BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps);
 
-    // If the next instruction also folds the same SS modref and can be unfoled,
-    // then it's worthwhile to issue a load from SS into the free register and
-    // then unfold these instructions.
-    if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM))
-      return false;
+  bool InsertEmergencySpills(MachineInstr *MI);
 
-    // Back-schedule reloads and remats.
-    ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, false, SS, TII, MF);
+  bool InsertRestores(MachineInstr *MI,
+                      AvailableSpills &Spills,
+                      BitVector &RegKills,
+                      std::vector<MachineOperand*> &KillOps);
 
-    // Load from SS to the spare physical register.
-    TII->loadRegFromStackSlot(MBB, MII, PhysReg, SS, RC);
-    // This invalidates Phys.
-    Spills.ClobberPhysReg(PhysReg);
-    // Remember it's available.
-    Spills.addAvailable(SS, PhysReg);
-    MaybeDeadStores[SS] = NULL;
+  bool InsertSpills(MachineInstr *MI);
 
-    // Unfold current MI.
-    SmallVector<MachineInstr*, 4> NewMIs;
-    if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
+  void RewriteMBB(LiveIntervals *LIs,
+                  AvailableSpills &Spills, BitVector &RegKills,
+                  std::vector<MachineOperand*> &KillOps);
+};
+}
+
+bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
+                                         LiveIntervals* LIs) {
+  MRI = &MF.getRegInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
+  VRM = &vrm;
+  AllocatableRegs = TRI->getAllocatableSet(MF);
+  DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
+        << MF.getFunction()->getName() << "':\n");
+  DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
+        " reloads!) ****\n");
+  DEBUG(MF.dump());
+
+  // Spills - Keep track of which spilled values are available in physregs
+  // so that we can choose to reuse the physregs instead of emitting
+  // reloads. This is usually refreshed per basic block.
+  AvailableSpills Spills(TRI, TII);
+
+  // Keep track of kill information.
+  BitVector RegKills(TRI->getNumRegs());
+  std::vector<MachineOperand*> KillOps;
+  KillOps.resize(TRI->getNumRegs(), NULL);
+
+  // SingleEntrySuccs - Successor blocks which have a single predecessor.
+  SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
+  SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
+
+  // Traverse the basic blocks depth first.
+  MachineBasicBlock *Entry = MF.begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  for (df_ext_iterator<MachineBasicBlock*,
+         SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MBB = *DFI;
+    if (!EarlyVisited.count(MBB))
+      RewriteMBB(LIs, Spills, RegKills, KillOps);
+
+    // If this MBB is the only predecessor of a successor. Keep the
+    // availability information and visit it next.
+    do {
+      // Keep visiting single predecessor successor as long as possible.
+      SinglePredSuccs.clear();
+      findSinglePredSuccessor(MBB, SinglePredSuccs);
+      if (SinglePredSuccs.empty())
+        MBB = 0;
+      else {
+        // FIXME: More than one successors, each of which has MBB has
+        // the only predecessor.
+        MBB = SinglePredSuccs[0];
+        if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
+          Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
+          RewriteMBB(LIs, Spills, RegKills, KillOps);
+        }
+      }
+    } while (MBB);
+
+    // Clear the availability info.
+    Spills.clear();
+  }
+
+  DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+  DEBUG(MF.dump());
+
+  // Mark unused spill slots.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  int SS = VRM->getLowSpillSlot();
+  if (SS != VirtRegMap::NO_STACK_SLOT)
+    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS)
+      if (!VRM->isSpillSlotUsed(SS)) {
+        MFI->RemoveStackObject(SS);
+        ++NumDSS;
+      }
+
+  return true;
+}
+
+/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
+/// a scratch register is available.
+///     xorq  %r12<kill>, %r13
+///     addq  %rax, -184(%rbp)
+///     addq  %r13, -184(%rbp)
+/// ==>
+///     xorq  %r12<kill>, %r13
+///     movq  -184(%rbp), %r12
+///     addq  %rax, %r12
+///     addq  %r13, %r12
+///     movq  %r12, -184(%rbp)
+bool LocalRewriter::
+OptimizeByUnfold2(unsigned VirtReg, int SS,
+                  MachineBasicBlock::iterator &MII,
+                  std::vector<MachineInstr*> &MaybeDeadStores,
+                  AvailableSpills &Spills,
+                  BitVector &RegKills,
+                  std::vector<MachineOperand*> &KillOps) {
+
+  MachineBasicBlock::iterator NextMII = llvm::next(MII);
+  if (NextMII == MBB->end())
+    return false;
+
+  if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
+    return false;
+
+  // Now let's see if the last couple of instructions happens to have freed up
+  // a register.
+  const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+  unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs);
+  if (!PhysReg)
+    return false;
+
+  MachineFunction &MF = *MBB->getParent();
+  TRI = MF.getTarget().getRegisterInfo();
+  MachineInstr &MI = *MII;
+  if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM))
+    return false;
+
+  // If the next instruction also folds the same SS modref and can be unfoled,
+  // then it's worthwhile to issue a load from SS into the free register and
+  // then unfold these instructions.
+  if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM))
+    return false;
+
+  // Back-schedule reloads and remats.
+  ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
+
+  // Load from SS to the spare physical register.
+  TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC);
+  // This invalidates Phys.
+  Spills.ClobberPhysReg(PhysReg);
+  // Remember it's available.
+  Spills.addAvailable(SS, PhysReg);
+  MaybeDeadStores[SS] = NULL;
+
+  // Unfold current MI.
+  SmallVector<MachineInstr*, 4> NewMIs;
+  if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
+    llvm_unreachable("Unable unfold the load / store folding instruction!");
+  assert(NewMIs.size() == 1);
+  AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+  VRM->transferRestorePts(&MI, NewMIs[0]);
+  MII = MBB->insert(MII, NewMIs[0]);
+  InvalidateKills(MI, TRI, RegKills, KillOps);
+  VRM->RemoveMachineInstrFromMaps(&MI);
+  MBB->erase(&MI);
+  ++NumModRefUnfold;
+
+  // Unfold next instructions that fold the same SS.
+  do {
+    MachineInstr &NextMI = *NextMII;
+    NextMII = llvm::next(NextMII);
+    NewMIs.clear();
+    if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
       llvm_unreachable("Unable unfold the load / store folding instruction!");
     assert(NewMIs.size() == 1);
     AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
-    VRM.transferRestorePts(&MI, NewMIs[0]);
-    MII = MBB.insert(MII, NewMIs[0]);
-    InvalidateKills(MI, TRI, RegKills, KillOps);
-    VRM.RemoveMachineInstrFromMaps(&MI);
-    MBB.erase(&MI);
+    VRM->transferRestorePts(&NextMI, NewMIs[0]);
+    MBB->insert(NextMII, NewMIs[0]);
+    InvalidateKills(NextMI, TRI, RegKills, KillOps);
+    VRM->RemoveMachineInstrFromMaps(&NextMI);
+    MBB->erase(&NextMI);
     ++NumModRefUnfold;
+    if (NextMII == MBB->end())
+      break;
+  } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
 
-    // Unfold next instructions that fold the same SS.
-    do {
-      MachineInstr &NextMI = *NextMII;
-      NextMII = llvm::next(NextMII);
-      NewMIs.clear();
-      if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
-        llvm_unreachable("Unable unfold the load / store folding instruction!");
-      assert(NewMIs.size() == 1);
-      AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
-      VRM.transferRestorePts(&NextMI, NewMIs[0]);
-      MBB.insert(NextMII, NewMIs[0]);
-      InvalidateKills(NextMI, TRI, RegKills, KillOps);
-      VRM.RemoveMachineInstrFromMaps(&NextMI);
-      MBB.erase(&NextMI);
-      ++NumModRefUnfold;
-      if (NextMII == MBB.end())
-        break;
-    } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, VRM));
-
-    // Store the value back into SS.
-    TII->storeRegToStackSlot(MBB, NextMII, PhysReg, true, SS, RC);
-    MachineInstr *StoreMI = prior(NextMII);
-    VRM.addSpillSlotUse(SS, StoreMI);
-    VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+  // Store the value back into SS.
+  TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC);
+  MachineInstr *StoreMI = prior(NextMII);
+  VRM->addSpillSlotUse(SS, StoreMI);
+  VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
 
-    return true;
-  }
+  return true;
+}
 
-  /// OptimizeByUnfold - Turn a store folding instruction into a load folding
-  /// instruction. e.g.
-  ///     xorl  %edi, %eax
-  ///     movl  %eax, -32(%ebp)
-  ///     movl  -36(%ebp), %eax
-  ///     orl   %eax, -32(%ebp)
-  /// ==>
-  ///     xorl  %edi, %eax
-  ///     orl   -36(%ebp), %eax
-  ///     mov   %eax, -32(%ebp)
-  /// This enables unfolding optimization for a subsequent instruction which will
-  /// also eliminate the newly introduced store instruction.
-  bool OptimizeByUnfold(MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator &MII,
-                        std::vector<MachineInstr*> &MaybeDeadStores,
-                        AvailableSpills &Spills,
-                        BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps,
-                        VirtRegMap &VRM) {
-    MachineFunction &MF = *MBB.getParent();
-    MachineInstr &MI = *MII;
-    unsigned UnfoldedOpc = 0;
-    unsigned UnfoldPR = 0;
-    unsigned UnfoldVR = 0;
-    int FoldedSS = VirtRegMap::NO_STACK_SLOT;
-    VirtRegMap::MI2VirtMapTy::const_iterator I, End;
-    for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
-      // Only transform a MI that folds a single register.
-      if (UnfoldedOpc)
-        return false;
-      UnfoldVR = I->second.first;
-      VirtRegMap::ModRef MR = I->second.second;
-      // MI2VirtMap be can updated which invalidate the iterator.
-      // Increment the iterator first.
-      ++I; 
-      if (VRM.isAssignedReg(UnfoldVR))
+/// OptimizeByUnfold - Turn a store folding instruction into a load folding
+/// instruction. e.g.
+///     xorl  %edi, %eax
+///     movl  %eax, -32(%ebp)
+///     movl  -36(%ebp), %eax
+///     orl   %eax, -32(%ebp)
+/// ==>
+///     xorl  %edi, %eax
+///     orl   -36(%ebp), %eax
+///     mov   %eax, -32(%ebp)
+/// This enables unfolding optimization for a subsequent instruction which will
+/// also eliminate the newly introduced store instruction.
+bool LocalRewriter::
+OptimizeByUnfold(MachineBasicBlock::iterator &MII,
+                 std::vector<MachineInstr*> &MaybeDeadStores,
+                 AvailableSpills &Spills,
+                 BitVector &RegKills,
+                 std::vector<MachineOperand*> &KillOps) {
+  MachineFunction &MF = *MBB->getParent();
+  MachineInstr &MI = *MII;
+  unsigned UnfoldedOpc = 0;
+  unsigned UnfoldPR = 0;
+  unsigned UnfoldVR = 0;
+  int FoldedSS = VirtRegMap::NO_STACK_SLOT;
+  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+  for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
+    // Only transform a MI that folds a single register.
+    if (UnfoldedOpc)
+      return false;
+    UnfoldVR = I->second.first;
+    VirtRegMap::ModRef MR = I->second.second;
+    // MI2VirtMap be can updated which invalidate the iterator.
+    // Increment the iterator first.
+    ++I;
+    if (VRM->isAssignedReg(UnfoldVR))
+      continue;
+    // If this reference is not a use, any previous store is now dead.
+    // Otherwise, the store to this stack slot is not dead anymore.
+    FoldedSS = VRM->getStackSlot(UnfoldVR);
+    MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
+    if (DeadStore && (MR & VirtRegMap::isModRef)) {
+      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
+      if (!PhysReg || !DeadStore->readsRegister(PhysReg))
         continue;
-      // If this reference is not a use, any previous store is now dead.
-      // Otherwise, the store to this stack slot is not dead anymore.
-      FoldedSS = VRM.getStackSlot(UnfoldVR);
-      MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
-      if (DeadStore && (MR & VirtRegMap::isModRef)) {
-        unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
-        if (!PhysReg || !DeadStore->readsRegister(PhysReg))
-          continue;
-        UnfoldPR = PhysReg;
-        UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
-                                                      false, true);
-      }
+      UnfoldPR = PhysReg;
+      UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+                                                    false, true);
     }
+  }
 
-    if (!UnfoldedOpc) {
-      if (!UnfoldVR)
-        return false;
+  if (!UnfoldedOpc) {
+    if (!UnfoldVR)
+      return false;
 
-      // Look for other unfolding opportunities.
-      return OptimizeByUnfold2(UnfoldVR, FoldedSS, MBB, MII,
-                               MaybeDeadStores, Spills, RegKills, KillOps, VRM);
-    }
+    // Look for other unfolding opportunities.
+    return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills,
+                             RegKills, KillOps);
+  }
 
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
-        continue;
-      unsigned VirtReg = MO.getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
-        continue;
-      if (VRM.isAssignedReg(VirtReg)) {
-        unsigned PhysReg = VRM.getPhys(VirtReg);
-        if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
-          return false;
-      } else if (VRM.isReMaterialized(VirtReg))
-        continue;
-      int SS = VRM.getStackSlot(VirtReg);
-      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-      if (PhysReg) {
-        if (TRI->regsOverlap(PhysReg, UnfoldPR))
-          return false;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
+      continue;
+    unsigned VirtReg = MO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
+      continue;
+    if (VRM->isAssignedReg(VirtReg)) {
+      unsigned PhysReg = VRM->getPhys(VirtReg);
+      if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
+        return false;
+    } else if (VRM->isReMaterialized(VirtReg))
+      continue;
+    int SS = VRM->getStackSlot(VirtReg);
+    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+    if (PhysReg) {
+      if (TRI->regsOverlap(PhysReg, UnfoldPR))
+        return false;
+      continue;
+    }
+    if (VRM->hasPhys(VirtReg)) {
+      PhysReg = VRM->getPhys(VirtReg);
+      if (!TRI->regsOverlap(PhysReg, UnfoldPR))
         continue;
-      }
-      if (VRM.hasPhys(VirtReg)) {
-        PhysReg = VRM.getPhys(VirtReg);
-        if (!TRI->regsOverlap(PhysReg, UnfoldPR))
-          continue;
-      }
+    }
 
-      // Ok, we'll need to reload the value into a register which makes
-      // it impossible to perform the store unfolding optimization later.
-      // Let's see if it is possible to fold the load if the store is
-      // unfolded. This allows us to perform the store unfolding
-      // optimization.
-      SmallVector<MachineInstr*, 4> NewMIs;
-      if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
-        assert(NewMIs.size() == 1);
-        MachineInstr *NewMI = NewMIs.back();
-        NewMIs.clear();
-        int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
-        assert(Idx != -1);
-        SmallVector<unsigned, 1> Ops;
-        Ops.push_back(Idx);
-        MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS);
-        if (FoldedMI) {
-          VRM.addSpillSlotUse(SS, FoldedMI);
-          if (!VRM.hasPhys(UnfoldVR))
-            VRM.assignVirt2Phys(UnfoldVR, UnfoldPR);
-          VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
-          MII = MBB.insert(MII, FoldedMI);
-          InvalidateKills(MI, TRI, RegKills, KillOps);
-          VRM.RemoveMachineInstrFromMaps(&MI);
-          MBB.erase(&MI);
-          MF.DeleteMachineInstr(NewMI);
-          return true;
-        }
+    // Ok, we'll need to reload the value into a register which makes
+    // it impossible to perform the store unfolding optimization later.
+    // Let's see if it is possible to fold the load if the store is
+    // unfolded. This allows us to perform the store unfolding
+    // optimization.
+    SmallVector<MachineInstr*, 4> NewMIs;
+    if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
+      assert(NewMIs.size() == 1);
+      MachineInstr *NewMI = NewMIs.back();
+      NewMIs.clear();
+      int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
+      assert(Idx != -1);
+      SmallVector<unsigned, 1> Ops;
+      Ops.push_back(Idx);
+      MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS);
+      if (FoldedMI) {
+        VRM->addSpillSlotUse(SS, FoldedMI);
+        if (!VRM->hasPhys(UnfoldVR))
+          VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
+        VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+        MII = MBB->insert(MII, FoldedMI);
+        InvalidateKills(MI, TRI, RegKills, KillOps);
+        VRM->RemoveMachineInstrFromMaps(&MI);
+        MBB->erase(&MI);
         MF.DeleteMachineInstr(NewMI);
+        return true;
       }
+      MF.DeleteMachineInstr(NewMI);
     }
+  }
 
+  return false;
+}
+
+/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
+/// where SrcReg is r1 and it is tied to r0. Return true if after
+/// commuting this instruction it will be r0 = op r2, r1.
+static bool CommuteChangesDestination(MachineInstr *DefMI,
+                                      const TargetInstrDesc &TID,
+                                      unsigned SrcReg,
+                                      const TargetInstrInfo *TII,
+                                      unsigned &DstIdx) {
+  if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+    return false;
+  if (!DefMI->getOperand(1).isReg() ||
+      DefMI->getOperand(1).getReg() != SrcReg)
+    return false;
+  unsigned DefIdx;
+  if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
     return false;
+  unsigned SrcIdx1, SrcIdx2;
+  if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
+    return false;
+  if (SrcIdx1 == 1 && SrcIdx2 == 2) {
+    DstIdx = 2;
+    return true;
   }
+  return false;
+}
+
+/// CommuteToFoldReload -
+/// Look for
+/// r1 = load fi#1
+/// r1 = op r1, r2<kill>
+/// store r1, fi#1
+///
+/// If op is commutable and r2 is killed, then we can xform these to
+/// r2 = op r2, fi#1
+/// store r2, fi#1
+bool LocalRewriter::
+CommuteToFoldReload(MachineBasicBlock::iterator &MII,
+                    unsigned VirtReg, unsigned SrcReg, int SS,
+                    AvailableSpills &Spills,
+                    BitVector &RegKills,
+                    std::vector<MachineOperand*> &KillOps,
+                    const TargetRegisterInfo *TRI) {
+  if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
+    return false;
 
-  /// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
-  /// where SrcReg is r1 and it is tied to r0. Return true if after
-  /// commuting this instruction it will be r0 = op r2, r1.
-  static bool CommuteChangesDestination(MachineInstr *DefMI,
-                                        const TargetInstrDesc &TID,
-                                        unsigned SrcReg,
-                                        const TargetInstrInfo *TII,
-                                        unsigned &DstIdx) {
-    if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+  MachineFunction &MF = *MBB->getParent();
+  MachineInstr &MI = *MII;
+  MachineBasicBlock::iterator DefMII = prior(MII);
+  MachineInstr *DefMI = DefMII;
+  const TargetInstrDesc &TID = DefMI->getDesc();
+  unsigned NewDstIdx;
+  if (DefMII != MBB->begin() &&
+      TID.isCommutable() &&
+      CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
+    MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+    unsigned NewReg = NewDstMO.getReg();
+    if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
       return false;
-    if (!DefMI->getOperand(1).isReg() ||
-        DefMI->getOperand(1).getReg() != SrcReg)
+    MachineInstr *ReloadMI = prior(DefMII);
+    int FrameIdx;
+    unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
+    if (DestReg != SrcReg || FrameIdx != SS)
       return false;
-    unsigned DefIdx;
-    if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
+    int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
+    if (UseIdx == -1)
       return false;
-    unsigned SrcIdx1, SrcIdx2;
-    if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
+    unsigned DefIdx;
+    if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
       return false;
-    if (SrcIdx1 == 1 && SrcIdx2 == 2) {
-      DstIdx = 2;
-      return true;
-    }
-    return false;
-  }
+    assert(DefMI->getOperand(DefIdx).isReg() &&
+           DefMI->getOperand(DefIdx).getReg() == SrcReg);
 
-  /// CommuteToFoldReload -
-  /// Look for
-  /// r1 = load fi#1
-  /// r1 = op r1, r2<kill>
-  /// store r1, fi#1
-  ///
-  /// If op is commutable and r2 is killed, then we can xform these to
-  /// r2 = op r2, fi#1
-  /// store r2, fi#1
-  bool CommuteToFoldReload(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator &MII,
-                           unsigned VirtReg, unsigned SrcReg, int SS,
-                           AvailableSpills &Spills,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps,
-                           const TargetRegisterInfo *TRI,
-                           VirtRegMap &VRM) {
-    if (MII == MBB.begin() || !MII->killsRegister(SrcReg))
+    // Now commute def instruction.
+    MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
+    if (!CommutedMI)
+      return false;
+    SmallVector<unsigned, 1> Ops;
+    Ops.push_back(NewDstIdx);
+    MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
+    // Not needed since foldMemoryOperand returns new MI.
+    MF.DeleteMachineInstr(CommutedMI);
+    if (!FoldedMI)
       return false;
 
-    MachineFunction &MF = *MBB.getParent();
-    MachineInstr &MI = *MII;
-    MachineBasicBlock::iterator DefMII = prior(MII);
-    MachineInstr *DefMI = DefMII;
-    const TargetInstrDesc &TID = DefMI->getDesc();
-    unsigned NewDstIdx;
-    if (DefMII != MBB.begin() &&
-        TID.isCommutable() &&
-        CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
-      MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
-      unsigned NewReg = NewDstMO.getReg();
-      if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
-        return false;
-      MachineInstr *ReloadMI = prior(DefMII);
-      int FrameIdx;
-      unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
-      if (DestReg != SrcReg || FrameIdx != SS)
-        return false;
-      int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
-      if (UseIdx == -1)
-        return false;
-      unsigned DefIdx;
-      if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
-        return false;
-      assert(DefMI->getOperand(DefIdx).isReg() &&
-             DefMI->getOperand(DefIdx).getReg() == SrcReg);
+    VRM->addSpillSlotUse(SS, FoldedMI);
+    VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+    // Insert new def MI and spill MI.
+    const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+    TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC);
+    MII = prior(MII);
+    MachineInstr *StoreMI = MII;
+    VRM->addSpillSlotUse(SS, StoreMI);
+    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+    MII = MBB->insert(MII, FoldedMI);  // Update MII to backtrack.
+
+    // Delete all 3 old instructions.
+    InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
+    VRM->RemoveMachineInstrFromMaps(ReloadMI);
+    MBB->erase(ReloadMI);
+    InvalidateKills(*DefMI, TRI, RegKills, KillOps);
+    VRM->RemoveMachineInstrFromMaps(DefMI);
+    MBB->erase(DefMI);
+    InvalidateKills(MI, TRI, RegKills, KillOps);
+    VRM->RemoveMachineInstrFromMaps(&MI);
+    MBB->erase(&MI);
 
-      // Now commute def instruction.
-      MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
-      if (!CommutedMI)
-        return false;
-      SmallVector<unsigned, 1> Ops;
-      Ops.push_back(NewDstIdx);
-      MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
-      // Not needed since foldMemoryOperand returns new MI.
-      MF.DeleteMachineInstr(CommutedMI);
-      if (!FoldedMI)
-        return false;
+    // If NewReg was previously holding value of some SS, it's now clobbered.
+    // This has to be done now because it's a physical register. When this
+    // instruction is re-visited, it's ignored.
+    Spills.ClobberPhysReg(NewReg);
 
-      VRM.addSpillSlotUse(SS, FoldedMI);
-      VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
-      // Insert new def MI and spill MI.
-      const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-      TII->storeRegToStackSlot(MBB, &MI, NewReg, true, SS, RC);
-      MII = prior(MII);
-      MachineInstr *StoreMI = MII;
-      VRM.addSpillSlotUse(SS, StoreMI);
-      VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-      MII = MBB.insert(MII, FoldedMI);  // Update MII to backtrack.
-
-      // Delete all 3 old instructions.
-      InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
-      VRM.RemoveMachineInstrFromMaps(ReloadMI);
-      MBB.erase(ReloadMI);
-      InvalidateKills(*DefMI, TRI, RegKills, KillOps);
-      VRM.RemoveMachineInstrFromMaps(DefMI);
-      MBB.erase(DefMI);
-      InvalidateKills(MI, TRI, RegKills, KillOps);
-      VRM.RemoveMachineInstrFromMaps(&MI);
-      MBB.erase(&MI);
+    ++NumCommutes;
+    return true;
+  }
 
-      // If NewReg was previously holding value of some SS, it's now clobbered.
-      // This has to be done now because it's a physical register. When this
-      // instruction is re-visited, it's ignored.
-      Spills.ClobberPhysReg(NewReg);
+  return false;
+}
 
-      ++NumCommutes;
-      return true;
+/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
+/// the last store to the same slot is now dead. If so, remove the last store.
+void LocalRewriter::
+SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
+                    int Idx, unsigned PhysReg, int StackSlot,
+                    const TargetRegisterClass *RC,
+                    bool isAvailable, MachineInstr *&LastStore,
+                    AvailableSpills &Spills,
+                    SmallSet<MachineInstr*, 4> &ReMatDefs,
+                    BitVector &RegKills,
+                    std::vector<MachineOperand*> &KillOps) {
+
+  MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+  TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
+  MachineInstr *StoreMI = prior(oldNextMII);
+  VRM->addSpillSlotUse(StackSlot, StoreMI);
+  DEBUG(dbgs() << "Store:\t" << *StoreMI);
+
+  // If there is a dead store to this stack slot, nuke it now.
+  if (LastStore) {
+    DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
+    ++NumDSE;
+    SmallVector<unsigned, 2> KillRegs;
+    InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
+    MachineBasicBlock::iterator PrevMII = LastStore;
+    bool CheckDef = PrevMII != MBB->begin();
+    if (CheckDef)
+      --PrevMII;
+    VRM->RemoveMachineInstrFromMaps(LastStore);
+    MBB->erase(LastStore);
+    if (CheckDef) {
+      // Look at defs of killed registers on the store. Mark the defs
+      // as dead since the store has been deleted and they aren't
+      // being reused.
+      for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
+        bool HasOtherDef = false;
+        if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
+          MachineInstr *DeadDef = PrevMII;
+          if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
+            // FIXME: This assumes a remat def does not have side effects.
+            VRM->RemoveMachineInstrFromMaps(DeadDef);
+            MBB->erase(DeadDef);
+            ++NumDRM;
+          }
+        }
+      }
     }
+  }
+
+  // Allow for multi-instruction spill sequences, as on PPC Altivec.  Presume
+  // the last of multiple instructions is the actual store.
+  LastStore = prior(oldNextMII);
+
+  // If the stack slot value was previously available in some other
+  // register, change it now.  Otherwise, make the register available,
+  // in PhysReg.
+  Spills.ModifyStackSlotOrReMat(StackSlot);
+  Spills.ClobberPhysReg(PhysReg);
+  Spills.addAvailable(StackSlot, PhysReg, isAvailable);
+  ++NumStores;
+}
 
+/// isSafeToDelete - Return true if this instruction doesn't produce any side
+/// effect and all of its defs are dead.
+static bool isSafeToDelete(MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+      TID.isCall() || TID.isBarrier() || TID.isReturn() ||
+      TID.hasUnmodeledSideEffects())
     return false;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.getReg())
+      continue;
+    if (MO.isDef() && !MO.isDead())
+      return false;
+    if (MO.isUse() && MO.isKill())
+      // FIXME: We can't remove kill markers or else the scavenger will assert.
+      // An alternative is to add a ADD pseudo instruction to replace kill
+      // markers.
+      return false;
   }
+  return true;
+}
 
-  /// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
-  /// the last store to the same slot is now dead. If so, remove the last store.
-  void SpillRegToStackSlot(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator &MII,
-                           int Idx, unsigned PhysReg, int StackSlot,
-                           const TargetRegisterClass *RC,
-                           bool isAvailable, MachineInstr *&LastStore,
-                           AvailableSpills &Spills,
-                           SmallSet<MachineInstr*, 4> &ReMatDefs,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps,
-                           VirtRegMap &VRM) {
+/// TransferDeadness - A identity copy definition is dead and it's being
+/// removed. Find the last def or use and mark it as dead / kill.
+void LocalRewriter::
+TransferDeadness(unsigned Reg, BitVector &RegKills,
+                 std::vector<MachineOperand*> &KillOps) {
+  SmallPtrSet<MachineInstr*, 4> Seens;
+  SmallVector<std::pair<MachineInstr*, int>,8> Refs;
+  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
+         RE = MRI->reg_end(); RI != RE; ++RI) {
+    MachineInstr *UDMI = &*RI;
+    if (UDMI->getParent() != MBB)
+      continue;
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+    if (DI == DistanceMap.end())
+      continue;
+    if (Seens.insert(UDMI))
+      Refs.push_back(std::make_pair(UDMI, DI->second));
+  }
 
-    MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
-    TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
-    MachineInstr *StoreMI = prior(oldNextMII);
-    VRM.addSpillSlotUse(StackSlot, StoreMI);
-    DEBUG(dbgs() << "Store:\t" << *StoreMI);
+  if (Refs.empty())
+    return;
+  std::sort(Refs.begin(), Refs.end(), RefSorter());
 
-    // If there is a dead store to this stack slot, nuke it now.
-    if (LastStore) {
-      DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
-      ++NumDSE;
-      SmallVector<unsigned, 2> KillRegs;
-      InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
-      MachineBasicBlock::iterator PrevMII = LastStore;
-      bool CheckDef = PrevMII != MBB.begin();
-      if (CheckDef)
-        --PrevMII;
-      VRM.RemoveMachineInstrFromMaps(LastStore);
-      MBB.erase(LastStore);
-      if (CheckDef) {
-        // Look at defs of killed registers on the store. Mark the defs
-        // as dead since the store has been deleted and they aren't
-        // being reused.
-        for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
-          bool HasOtherDef = false;
-          if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
-            MachineInstr *DeadDef = PrevMII;
-            if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
-              // FIXME: This assumes a remat def does not have side effects.
-              VRM.RemoveMachineInstrFromMaps(DeadDef);
-              MBB.erase(DeadDef);
-              ++NumDRM;
-            }
-          }
-        }
+  while (!Refs.empty()) {
+    MachineInstr *LastUDMI = Refs.back().first;
+    Refs.pop_back();
+
+    MachineOperand *LastUD = NULL;
+    for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = LastUDMI->getOperand(i);
+      if (!MO.isReg() || MO.getReg() != Reg)
+        continue;
+      if (!LastUD || (LastUD->isUse() && MO.isDef()))
+        LastUD = &MO;
+      if (LastUDMI->isRegTiedToDefOperand(i))
+        break;
+    }
+    if (LastUD->isDef()) {
+      // If the instruction has no side effect, delete it and propagate
+      // backward further. Otherwise, mark is dead and we are done.
+      if (!isSafeToDelete(*LastUDMI)) {
+        LastUD->setIsDead();
+        break;
       }
+      VRM->RemoveMachineInstrFromMaps(LastUDMI);
+      MBB->erase(LastUDMI);
+    } else {
+      LastUD->setIsKill();
+      RegKills.set(Reg);
+      KillOps[Reg] = LastUD;
+      break;
     }
+  }
+}
+
+/// InsertEmergencySpills - Insert emergency spills before MI if requested by
+/// VRM. Return true if spills were inserted.
+bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
+  if (!VRM->hasEmergencySpills(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  SmallSet<int, 4> UsedSS;
+  std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
+  for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
+    unsigned PhysReg = EmSpills[i];
+    const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(PhysReg);
+    assert(RC && "Unable to determine register class!");
+    int SS = VRM->getEmergencySpillSlot(RC);
+    if (UsedSS.count(SS))
+      llvm_unreachable("Need to spill more than one physical registers!");
+    UsedSS.insert(SS);
+    TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC);
+    MachineInstr *StoreMI = prior(MII);
+    VRM->addSpillSlotUse(SS, StoreMI);
+
+    // Back-schedule reloads and remats.
+    MachineBasicBlock::iterator InsertLoc =
+      ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
+                       TII, *MBB->getParent());
+
+    TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC);
 
-    // Allow for multi-instruction spill sequences, as on PPC Altivec.  Presume
-    // the last of multiple instructions is the actual store.
-    LastStore = prior(oldNextMII);
-
-    // If the stack slot value was previously available in some other
-    // register, change it now.  Otherwise, make the register available,
-    // in PhysReg.
-    Spills.ModifyStackSlotOrReMat(StackSlot);
-    Spills.ClobberPhysReg(PhysReg);
-    Spills.addAvailable(StackSlot, PhysReg, isAvailable);
-    ++NumStores;
+    MachineInstr *LoadMI = prior(InsertLoc);
+    VRM->addSpillSlotUse(SS, LoadMI);
+    ++NumPSpills;
+    DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
   }
+  return true;
+}
 
-  /// isSafeToDelete - Return true if this instruction doesn't produce any side
-  /// effect and all of its defs are dead.
-  static bool isSafeToDelete(MachineInstr &MI) {
-    const TargetInstrDesc &TID = MI.getDesc();
-    if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
-        TID.isCall() || TID.isBarrier() || TID.isReturn() ||
-        TID.hasUnmodeledSideEffects())
-      return false;
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      if (!MO.isReg() || !MO.getReg())
-        continue;
-      if (MO.isDef() && !MO.isDead())
-        return false;
-      if (MO.isUse() && MO.isKill())
-        // FIXME: We can't remove kill markers or else the scavenger will assert.
-        // An alternative is to add a ADD pseudo instruction to replace kill
-        // markers.
-        return false;
+/// InsertRestores - Restore registers before MI is requested by VRM. Return
+/// true is any instructions were inserted.
+bool LocalRewriter::InsertRestores(MachineInstr *MI,
+                                   AvailableSpills &Spills,
+                                   BitVector &RegKills,
+                                   std::vector<MachineOperand*> &KillOps) {
+  if (!VRM->isRestorePt(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI);
+  for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
+    unsigned VirtReg = RestoreRegs[e-i-1];  // Reverse order.
+    if (!VRM->getPreSplitReg(VirtReg))
+      continue; // Split interval spilled again.
+    unsigned Phys = VRM->getPhys(VirtReg);
+    MRI->setPhysRegUsed(Phys);
+
+    // Check if the value being restored if available. If so, it must be
+    // from a predecessor BB that fallthrough into this BB. We do not
+    // expect:
+    // BB1:
+    // r1 = load fi#1
+    // ...
+    //    = r1<kill>
+    // ... # r1 not clobbered
+    // ...
+    //    = load fi#1
+    bool DoReMat = VRM->isReMaterialized(VirtReg);
+    int SSorRMId = DoReMat
+      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+    const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+    unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+    if (InReg == Phys) {
+      // If the value is already available in the expected register, save
+      // a reload / remat.
+      if (SSorRMId)
+        DEBUG(dbgs() << "Reusing RM#"
+                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+      else
+        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+      DEBUG(dbgs() << " from physreg "
+                   << TRI->getName(InReg) << " for vreg"
+                   << VirtReg <<" instead of reloading into physreg "
+                   << TRI->getName(Phys) << '\n');
+      ++NumOmitted;
+      continue;
+    } else if (InReg && InReg != Phys) {
+      if (SSorRMId)
+        DEBUG(dbgs() << "Reusing RM#"
+                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+      else
+        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+      DEBUG(dbgs() << " from physreg "
+                   << TRI->getName(InReg) << " for vreg"
+                   << VirtReg <<" by copying it into physreg "
+                   << TRI->getName(Phys) << '\n');
+
+      // If the reloaded / remat value is available in another register,
+      // copy it to the desired register.
+
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+                         *MBB->getParent());
+
+      TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC);
+
+      // This invalidates Phys.
+      Spills.ClobberPhysReg(Phys);
+      // Remember it's available.
+      Spills.addAvailable(SSorRMId, Phys);
+
+      // Mark is killed.
+      MachineInstr *CopyMI = prior(InsertLoc);
+      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+      MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
+      KillOpnd->setIsKill();
+      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+      DEBUG(dbgs() << '\t' << *CopyMI);
+      ++NumCopified;
+      continue;
     }
-    return true;
-  }
 
-  /// TransferDeadness - A identity copy definition is dead and it's being
-  /// removed. Find the last def or use and mark it as dead / kill.
-  void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist,
-                        unsigned Reg, BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps,
-                        VirtRegMap &VRM) {
-    SmallPtrSet<MachineInstr*, 4> Seens;
-    SmallVector<std::pair<MachineInstr*, int>,8> Refs;
-    for (MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(Reg),
-           RE = RegInfo->reg_end(); RI != RE; ++RI) {
-      MachineInstr *UDMI = &*RI;
-      if (UDMI->getParent() != MBB)
-        continue;
-      DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
-      if (DI == DistanceMap.end() || DI->second > CurDist)
-        continue;
-      if (Seens.insert(UDMI))
-        Refs.push_back(std::make_pair(UDMI, DI->second));
+    // Back-schedule reloads and remats.
+    MachineBasicBlock::iterator InsertLoc =
+      ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+                       *MBB->getParent());
+
+    if (VRM->isReMaterialized(VirtReg)) {
+      ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
+    } else {
+      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+      TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC);
+      MachineInstr *LoadMI = prior(InsertLoc);
+      VRM->addSpillSlotUse(SSorRMId, LoadMI);
+      ++NumLoads;
+      DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
     }
 
-    if (Refs.empty())
-      return;
-    std::sort(Refs.begin(), Refs.end(), RefSorter());
+    // This invalidates Phys.
+    Spills.ClobberPhysReg(Phys);
+    // Remember it's available.
+    Spills.addAvailable(SSorRMId, Phys);
 
-    while (!Refs.empty()) {
-      MachineInstr *LastUDMI = Refs.back().first;
-      Refs.pop_back();
+    UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+    DEBUG(dbgs() << '\t' << *prior(MII));
+  }
+  return true;
+}
 
-      MachineOperand *LastUD = NULL;
-      for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = LastUDMI->getOperand(i);
-        if (!MO.isReg() || MO.getReg() != Reg)
-          continue;
-        if (!LastUD || (LastUD->isUse() && MO.isDef()))
-          LastUD = &MO;
-        if (LastUDMI->isRegTiedToDefOperand(i))
-          break;
-      }
-      if (LastUD->isDef()) {
-        // If the instruction has no side effect, delete it and propagate
-        // backward further. Otherwise, mark is dead and we are done.
-        if (!isSafeToDelete(*LastUDMI)) {
-          LastUD->setIsDead();
-          break;
-        }
-        VRM.RemoveMachineInstrFromMaps(LastUDMI);
-        MBB->erase(LastUDMI);
-      } else {
-        LastUD->setIsKill();
-        RegKills.set(Reg);
-        KillOps[Reg] = LastUD;
-        break;
-      }
-    }
+/// InsertEmergencySpills - Insert spills after MI if requested by VRM. Return
+/// true if spills were inserted.
+bool LocalRewriter::InsertSpills(MachineInstr *MI) {
+  if (!VRM->isSpillPt(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  std::vector<std::pair<unsigned,bool> > &SpillRegs =
+    VRM->getSpillPtSpills(MI);
+  for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
+    unsigned VirtReg = SpillRegs[i].first;
+    bool isKill = SpillRegs[i].second;
+    if (!VRM->getPreSplitReg(VirtReg))
+      continue; // Split interval spilled again.
+    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+    unsigned Phys = VRM->getPhys(VirtReg);
+    int StackSlot = VRM->getStackSlot(VirtReg);
+    MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+    TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
+                             RC);
+    MachineInstr *StoreMI = prior(oldNextMII);
+    VRM->addSpillSlotUse(StackSlot, StoreMI);
+    DEBUG(dbgs() << "Store:\t" << *StoreMI);
+    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
   }
+  return true;
+}
 
-  /// rewriteMBB - Keep track of which spills are available even after the
-  /// register allocator is done with them.  If possible, avid reloading vregs.
-  void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM,
-                  LiveIntervals *LIs,
-                  AvailableSpills &Spills, BitVector &RegKills,
-                  std::vector<MachineOperand*> &KillOps) {
 
-    DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
-          << MBB.getName() << "':\n");
-
-    MachineFunction &MF = *MBB.getParent();
-    
-    // MaybeDeadStores - When we need to write a value back into a stack slot,
-    // keep track of the inserted store.  If the stack slot value is never read
-    // (because the value was used from some available register, for example), and
-    // subsequently stored to, the original store is dead.  This map keeps track
-    // of inserted stores that are not used.  If we see a subsequent store to the
-    // same stack slot, the original store is deleted.
-    std::vector<MachineInstr*> MaybeDeadStores;
-    MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
-
-    // ReMatDefs - These are rematerializable def MIs which are not deleted.
-    SmallSet<MachineInstr*, 4> ReMatDefs;
-
-    // Clear kill info.
-    SmallSet<unsigned, 2> KilledMIRegs;
-    RegKills.reset();
-    KillOps.clear();
-    KillOps.resize(TRI->getNumRegs(), NULL);
-
-    unsigned Dist = 0;
-    DistanceMap.clear();
-    for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
-         MII != E; ) {
-      MachineBasicBlock::iterator NextMII = llvm::next(MII);
-
-      VirtRegMap::MI2VirtMapTy::const_iterator I, End;
-      bool Erased = false;
-      bool BackTracked = false;
-      if (OptimizeByUnfold(MBB, MII,
-                           MaybeDeadStores, Spills, RegKills, KillOps, VRM))
-        NextMII = llvm::next(MII);
+/// rewriteMBB - Keep track of which spills are available even after the
+/// register allocator is done with them.  If possible, avid reloading vregs.
+void
+LocalRewriter::RewriteMBB(LiveIntervals *LIs,
+                          AvailableSpills &Spills, BitVector &RegKills,
+                          std::vector<MachineOperand*> &KillOps) {
 
-      MachineInstr &MI = *MII;
-
-      if (VRM.hasEmergencySpills(&MI)) {
-        // Spill physical register(s) in the rare case the allocator has run out
-        // of registers to allocate.
-        SmallSet<int, 4> UsedSS;
-        std::vector<unsigned> &EmSpills = VRM.getEmergencySpills(&MI);
-        for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
-          unsigned PhysReg = EmSpills[i];
-          const TargetRegisterClass *RC =
-            TRI->getPhysicalRegisterRegClass(PhysReg);
-          assert(RC && "Unable to determine register class!");
-          int SS = VRM.getEmergencySpillSlot(RC);
-          if (UsedSS.count(SS))
-            llvm_unreachable("Need to spill more than one physical registers!");
-          UsedSS.insert(SS);
-          TII->storeRegToStackSlot(MBB, MII, PhysReg, true, SS, RC);
-          MachineInstr *StoreMI = prior(MII);
-          VRM.addSpillSlotUse(SS, StoreMI);
-
-          // Back-schedule reloads and remats.
-          MachineBasicBlock::iterator InsertLoc =
-            ComputeReloadLoc(llvm::next(MII), MBB.begin(), PhysReg, TRI, false,
-                             SS, TII, MF);
-
-          TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC);
+  DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
+               << MBB->getName() << "':\n");
 
-          MachineInstr *LoadMI = prior(InsertLoc);
-          VRM.addSpillSlotUse(SS, LoadMI);
-          ++NumPSpills;
-          DistanceMap.insert(std::make_pair(LoadMI, Dist++));
-        }
-        NextMII = llvm::next(MII);
-      }
+  MachineFunction &MF = *MBB->getParent();
 
-      // Insert restores here if asked to.
-      if (VRM.isRestorePt(&MI)) {
-        std::vector<unsigned> &RestoreRegs = VRM.getRestorePtRestores(&MI);
-        for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
-          unsigned VirtReg = RestoreRegs[e-i-1];  // Reverse order.
-          if (!VRM.getPreSplitReg(VirtReg))
-            continue; // Split interval spilled again.
-          unsigned Phys = VRM.getPhys(VirtReg);
-          RegInfo->setPhysRegUsed(Phys);
-
-          // Check if the value being restored if available. If so, it must be
-          // from a predecessor BB that fallthrough into this BB. We do not
-          // expect:
-          // BB1:
-          // r1 = load fi#1
-          // ...
-          //    = r1<kill>
-          // ... # r1 not clobbered
-          // ...
-          //    = load fi#1
-          bool DoReMat = VRM.isReMaterialized(VirtReg);
-          int SSorRMId = DoReMat
-            ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
-          const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-          unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-          if (InReg == Phys) {
-            // If the value is already available in the expected register, save
-            // a reload / remat.
-            if (SSorRMId)
-              DEBUG(dbgs() << "Reusing RM#"
-                           << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
-            else
-              DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
-            DEBUG(dbgs() << " from physreg "
-                         << TRI->getName(InReg) << " for vreg"
-                         << VirtReg <<" instead of reloading into physreg "
-                         << TRI->getName(Phys) << '\n');
-            ++NumOmitted;
-            continue;
-          } else if (InReg && InReg != Phys) {
-            if (SSorRMId)
-              DEBUG(dbgs() << "Reusing RM#"
-                           << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
-            else
-              DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
-            DEBUG(dbgs() << " from physreg "
-                         << TRI->getName(InReg) << " for vreg"
-                         << VirtReg <<" by copying it into physreg "
-                         << TRI->getName(Phys) << '\n');
-
-            // If the reloaded / remat value is available in another register,
-            // copy it to the desired register.
-
-            // Back-schedule reloads and remats.
-            MachineBasicBlock::iterator InsertLoc =
-              ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
-                               SSorRMId, TII, MF);
-
-            TII->copyRegToReg(MBB, InsertLoc, Phys, InReg, RC, RC);
-
-            // This invalidates Phys.
-            Spills.ClobberPhysReg(Phys);
-            // Remember it's available.
-            Spills.addAvailable(SSorRMId, Phys);
-
-            // Mark is killed.
-            MachineInstr *CopyMI = prior(InsertLoc);
-            CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-            MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
-            KillOpnd->setIsKill();
-            UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-            DEBUG(dbgs() << '\t' << *CopyMI);
-            ++NumCopified;
-            continue;
-          }
+  // MaybeDeadStores - When we need to write a value back into a stack slot,
+  // keep track of the inserted store.  If the stack slot value is never read
+  // (because the value was used from some available register, for example), and
+  // subsequently stored to, the original store is dead.  This map keeps track
+  // of inserted stores that are not used.  If we see a subsequent store to the
+  // same stack slot, the original store is deleted.
+  std::vector<MachineInstr*> MaybeDeadStores;
+  MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
 
-          // Back-schedule reloads and remats.
-          MachineBasicBlock::iterator InsertLoc =
-            ComputeReloadLoc(MII, MBB.begin(), Phys, TRI, DoReMat,
-                             SSorRMId, TII, MF);
-
-          if (VRM.isReMaterialized(VirtReg)) {
-            ReMaterialize(MBB, InsertLoc, Phys, VirtReg, TII, TRI, VRM);
-          } else {
-            const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-            TII->loadRegFromStackSlot(MBB, InsertLoc, Phys, SSorRMId, RC);
-            MachineInstr *LoadMI = prior(InsertLoc);
-            VRM.addSpillSlotUse(SSorRMId, LoadMI);
-            ++NumLoads;
-            DistanceMap.insert(std::make_pair(LoadMI, Dist++));
-          }
+  // ReMatDefs - These are rematerializable def MIs which are not deleted.
+  SmallSet<MachineInstr*, 4> ReMatDefs;
 
-          // This invalidates Phys.
-          Spills.ClobberPhysReg(Phys);
-          // Remember it's available.
-          Spills.addAvailable(SSorRMId, Phys);
+  // Clear kill info.
+  SmallSet<unsigned, 2> KilledMIRegs;
+  RegKills.reset();
+  KillOps.clear();
+  KillOps.resize(TRI->getNumRegs(), NULL);
 
-          UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-          DEBUG(dbgs() << '\t' << *prior(MII));
-        }
-      }
+  DistanceMap.clear();
+  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+       MII != E; ) {
+    MachineBasicBlock::iterator NextMII = llvm::next(MII);
 
-      // Insert spills here if asked to.
-      if (VRM.isSpillPt(&MI)) {
-        std::vector<std::pair<unsigned,bool> > &SpillRegs =
-          VRM.getSpillPtSpills(&MI);
-        for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
-          unsigned VirtReg = SpillRegs[i].first;
-          bool isKill = SpillRegs[i].second;
-          if (!VRM.getPreSplitReg(VirtReg))
-            continue; // Split interval spilled again.
-          const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
-          unsigned Phys = VRM.getPhys(VirtReg);
-          int StackSlot = VRM.getStackSlot(VirtReg);
-          MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
-          TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC);
-          MachineInstr *StoreMI = prior(oldNextMII);
-          VRM.addSpillSlotUse(StackSlot, StoreMI);
-          DEBUG(dbgs() << "Store:\t" << *StoreMI);
-          VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-        }
-        NextMII = llvm::next(MII);
-      }
+    if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps))
+      NextMII = llvm::next(MII);
 
-      /// ReusedOperands - Keep track of operand reuse in case we need to undo
-      /// reuse.
-      ReuseInfo ReusedOperands(MI, TRI);
-      SmallVector<unsigned, 4> VirtUseOps;
-      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI.getOperand(i);
-        if (!MO.isReg() || MO.getReg() == 0)
-          continue;   // Ignore non-register operands.
-        
-        unsigned VirtReg = MO.getReg();
-        if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
-          // Ignore physregs for spilling, but remember that it is used by this
-          // function.
-          RegInfo->setPhysRegUsed(VirtReg);
-          continue;
-        }
+    if (InsertEmergencySpills(MII))
+      NextMII = llvm::next(MII);
+
+    InsertRestores(MII, Spills, RegKills, KillOps);
+
+    if (InsertSpills(MII))
+      NextMII = llvm::next(MII);
+
+    VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+    bool Erased = false;
+    bool BackTracked = false;
+    MachineInstr &MI = *MII;
+
+    /// ReusedOperands - Keep track of operand reuse in case we need to undo
+    /// reuse.
+    ReuseInfo ReusedOperands(MI, TRI);
+    SmallVector<unsigned, 4> VirtUseOps;
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isReg() || MO.getReg() == 0)
+        continue;   // Ignore non-register operands.
 
-        // We want to process implicit virtual register uses first.
-        if (MO.isImplicit())
-          // If the virtual register is implicitly defined, emit a implicit_def
-          // before so scavenger knows it's "defined".
-          // FIXME: This is a horrible hack done the by register allocator to
-          // remat a definition with virtual register operand.
-          VirtUseOps.insert(VirtUseOps.begin(), i);
-        else
-          VirtUseOps.push_back(i);
+      unsigned VirtReg = MO.getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+        // Ignore physregs for spilling, but remember that it is used by this
+        // function.
+        MRI->setPhysRegUsed(VirtReg);
+        continue;
       }
 
-      // Process all of the spilled uses and all non spilled reg references.
-      SmallVector<int, 2> PotentialDeadStoreSlots;
-      KilledMIRegs.clear();
-      for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
-        unsigned i = VirtUseOps[j];
-        unsigned VirtReg = MI.getOperand(i).getReg();
-        assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-               "Not a virtual register?");
-
-        unsigned SubIdx = MI.getOperand(i).getSubReg();
-        if (VRM.isAssignedReg(VirtReg)) {
-          // This virtual register was assigned a physreg!
-          unsigned Phys = VRM.getPhys(VirtReg);
-          RegInfo->setPhysRegUsed(Phys);
-          if (MI.getOperand(i).isDef())
-            ReusedOperands.markClobbered(Phys);
-          substitutePhysReg(MI.getOperand(i), Phys, *TRI);
-          if (VRM.isImplicitlyDefined(VirtReg))
-            // FIXME: Is this needed?
-            BuildMI(MBB, &MI, MI.getDebugLoc(),
-                    TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
-          continue;
-        }
+      // We want to process implicit virtual register uses first.
+      if (MO.isImplicit())
+        // If the virtual register is implicitly defined, emit a implicit_def
+        // before so scavenger knows it's "defined".
+        // FIXME: This is a horrible hack done the by register allocator to
+        // remat a definition with virtual register operand.
+        VirtUseOps.insert(VirtUseOps.begin(), i);
+      else
+        VirtUseOps.push_back(i);
+    }
 
-        // This virtual register is now known to be a spilled value.
-        if (!MI.getOperand(i).isUse())
-          continue;  // Handle defs in the loop below (handle use&def here though)
-
-        bool AvoidReload = MI.getOperand(i).isUndef();
-        // Check if it is defined by an implicit def. It should not be spilled.
-        // Note, this is for correctness reason. e.g.
-        // 8   %reg1024<def> = IMPLICIT_DEF
-        // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-        // The live range [12, 14) are not part of the r1024 live interval since
-        // it's defined by an implicit def. It will not conflicts with live
-        // interval of r1025. Now suppose both registers are spilled, you can
-        // easily see a situation where both registers are reloaded before
-        // the INSERT_SUBREG and both target registers that would overlap.
-        bool DoReMat = VRM.isReMaterialized(VirtReg);
-        int SSorRMId = DoReMat
-          ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg);
-        int ReuseSlot = SSorRMId;
-
-        // Check to see if this stack slot is available.
-        unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
-        // If this is a sub-register use, make sure the reuse register is in the
-        // right register class. For example, for x86 not all of the 32-bit
-        // registers have accessible sub-registers.
-        // Similarly so for EXTRACT_SUBREG. Consider this:
-        // EDI = op
-        // MOV32_mr fi#1, EDI
-        // ...
-        //       = EXTRACT_SUBREG fi#1
-        // fi#1 is available in EDI, but it cannot be reused because it's not in
-        // the right register file.
-        if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) {
-          const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-          if (!RC->contains(PhysReg))
-            PhysReg = 0;
-        }
+    // Process all of the spilled uses and all non spilled reg references.
+    SmallVector<int, 2> PotentialDeadStoreSlots;
+    KilledMIRegs.clear();
+    for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+      unsigned i = VirtUseOps[j];
+      unsigned VirtReg = MI.getOperand(i).getReg();
+      assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+             "Not a virtual register?");
+
+      unsigned SubIdx = MI.getOperand(i).getSubReg();
+      if (VRM->isAssignedReg(VirtReg)) {
+        // This virtual register was assigned a physreg!
+        unsigned Phys = VRM->getPhys(VirtReg);
+        MRI->setPhysRegUsed(Phys);
+        if (MI.getOperand(i).isDef())
+          ReusedOperands.markClobbered(Phys);
+        substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+        if (VRM->isImplicitlyDefined(VirtReg))
+          // FIXME: Is this needed?
+          BuildMI(*MBB, &MI, MI.getDebugLoc(),
+                  TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+        continue;
+      }
 
-        if (PhysReg && !AvoidReload) {
-          // This spilled operand might be part of a two-address operand.  If this
-          // is the case, then changing it will necessarily require changing the 
-          // def part of the instruction as well.  However, in some cases, we
-          // aren't allowed to modify the reused register.  If none of these cases
-          // apply, reuse it.
-          bool CanReuse = true;
-          bool isTied = MI.isRegTiedToDefOperand(i);
-          if (isTied) {
-            // Okay, we have a two address operand.  We can reuse this physreg as
-            // long as we are allowed to clobber the value and there isn't an
-            // earlier def that has already clobbered the physreg.
-            CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
-              Spills.canClobberPhysReg(PhysReg);
-          }
-          
-          if (CanReuse) {
-            // If this stack slot value is already available, reuse it!
-            if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DEBUG(dbgs() << "Reusing RM#"
-                           << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-            else
-              DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-            DEBUG(dbgs() << " from physreg "
-                         << TRI->getName(PhysReg) << " for vreg"
-                         << VirtReg <<" instead of reloading into physreg "
-                         << TRI->getName(VRM.getPhys(VirtReg)) << '\n');
-            unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-            MI.getOperand(i).setReg(RReg);
-            MI.getOperand(i).setSubReg(0);
-
-            // The only technical detail we have is that we don't know that
-            // PhysReg won't be clobbered by a reloaded stack slot that occurs
-            // later in the instruction.  In particular, consider 'op V1, V2'.
-            // If V1 is available in physreg R0, we would choose to reuse it
-            // here, instead of reloading it into the register the allocator
-            // indicated (say R1).  However, V2 might have to be reloaded
-            // later, and it might indicate that it needs to live in R0.  When
-            // this occurs, we need to have information available that
-            // indicates it is safe to use R1 for the reload instead of R0.
-            //
-            // To further complicate matters, we might conflict with an alias,
-            // or R0 and R1 might not be compatible with each other.  In this
-            // case, we actually insert a reload for V1 in R1, ensuring that
-            // we can get at R0 or its alias.
-            ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
-                                    VRM.getPhys(VirtReg), VirtReg);
-            if (isTied)
-              // Only mark it clobbered if this is a use&def operand.
-              ReusedOperands.markClobbered(PhysReg);
-            ++NumReused;
-
-            if (MI.getOperand(i).isKill() &&
-                ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
-              // The store of this spilled value is potentially dead, but we
-              // won't know for certain until we've confirmed that the re-use
-              // above is valid, which means waiting until the other operands
-              // are processed. For now we just track the spill slot, we'll
-              // remove it after the other operands are processed if valid.
-
-              PotentialDeadStoreSlots.push_back(ReuseSlot);
-            }
+      // This virtual register is now known to be a spilled value.
+      if (!MI.getOperand(i).isUse())
+        continue;  // Handle defs in the loop below (handle use&def here though)
+
+      bool AvoidReload = MI.getOperand(i).isUndef();
+      // Check if it is defined by an implicit def. It should not be spilled.
+      // Note, this is for correctness reason. e.g.
+      // 8   %reg1024<def> = IMPLICIT_DEF
+      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+      // The live range [12, 14) are not part of the r1024 live interval since
+      // it's defined by an implicit def. It will not conflicts with live
+      // interval of r1025. Now suppose both registers are spilled, you can
+      // easily see a situation where both registers are reloaded before
+      // the INSERT_SUBREG and both target registers that would overlap.
+      bool DoReMat = VRM->isReMaterialized(VirtReg);
+      int SSorRMId = DoReMat
+        ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+      int ReuseSlot = SSorRMId;
+
+      // Check to see if this stack slot is available.
+      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+      // If this is a sub-register use, make sure the reuse register is in the
+      // right register class. For example, for x86 not all of the 32-bit
+      // registers have accessible sub-registers.
+      // Similarly so for EXTRACT_SUBREG. Consider this:
+      // EDI = op
+      // MOV32_mr fi#1, EDI
+      // ...
+      //       = EXTRACT_SUBREG fi#1
+      // fi#1 is available in EDI, but it cannot be reused because it's not in
+      // the right register file.
+      if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) {
+        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+        if (!RC->contains(PhysReg))
+          PhysReg = 0;
+      }
 
-            // Mark is isKill if it's there no other uses of the same virtual
-            // register and it's not a two-address operand. IsKill will be
-            // unset if reg is reused.
-            if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
-              MI.getOperand(i).setIsKill();
-              KilledMIRegs.insert(VirtReg);
-            }
+      if (PhysReg && !AvoidReload) {
+        // This spilled operand might be part of a two-address operand.  If this
+        // is the case, then changing it will necessarily require changing the
+        // def part of the instruction as well.  However, in some cases, we
+        // aren't allowed to modify the reused register.  If none of these cases
+        // apply, reuse it.
+        bool CanReuse = true;
+        bool isTied = MI.isRegTiedToDefOperand(i);
+        if (isTied) {
+          // Okay, we have a two address operand.  We can reuse this physreg as
+          // long as we are allowed to clobber the value and there isn't an
+          // earlier def that has already clobbered the physreg.
+          CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+            Spills.canClobberPhysReg(PhysReg);
+        }
 
-            continue;
-          }  // CanReuse
-          
-          // Otherwise we have a situation where we have a two-address instruction
-          // whose mod/ref operand needs to be reloaded.  This reload is already
-          // available in some register "PhysReg", but if we used PhysReg as the
-          // operand to our 2-addr instruction, the instruction would modify
-          // PhysReg.  This isn't cool if something later uses PhysReg and expects
-          // to get its initial value.
-          //
-          // To avoid this problem, and to avoid doing a load right after a store,
-          // we emit a copy from PhysReg into the designated register for this
-          // operand.
-          unsigned DesignatedReg = VRM.getPhys(VirtReg);
-          assert(DesignatedReg && "Must map virtreg to physreg!");
-
-          // Note that, if we reused a register for a previous operand, the
-          // register we want to reload into might not actually be
-          // available.  If this occurs, use the register indicated by the
-          // reuser.
-          if (ReusedOperands.hasReuses())
-            DesignatedReg = ReusedOperands.GetRegForReload(VirtReg,
-                                                           DesignatedReg, &MI, 
-                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
-          
-          // If the mapped designated register is actually the physreg we have
-          // incoming, we don't need to inserted a dead copy.
-          if (DesignatedReg == PhysReg) {
-            // If this stack slot value is already available, reuse it!
-            if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DEBUG(dbgs() << "Reusing RM#"
-                    << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-            else
-              DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-            DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
-                         << " for vreg" << VirtReg
-                         << " instead of reloading into same physreg.\n");
-            unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-            MI.getOperand(i).setReg(RReg);
-            MI.getOperand(i).setSubReg(0);
-            ReusedOperands.markClobbered(RReg);
-            ++NumReused;
-            continue;
-          }
-          
-          const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-          RegInfo->setPhysRegUsed(DesignatedReg);
-          ReusedOperands.markClobbered(DesignatedReg);
-
-          // Back-schedule reloads and remats.
-          MachineBasicBlock::iterator InsertLoc =
-            ComputeReloadLoc(&MI, MBB.begin(), PhysReg, TRI, DoReMat,
-                             SSorRMId, TII, MF);
-
-          TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
-
-          MachineInstr *CopyMI = prior(InsertLoc);
-          CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-          UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-          // This invalidates DesignatedReg.
-          Spills.ClobberPhysReg(DesignatedReg);
-          
-          Spills.addAvailable(ReuseSlot, DesignatedReg);
-          unsigned RReg =
-            SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+        if (CanReuse) {
+          // If this stack slot value is already available, reuse it!
+          if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+            DEBUG(dbgs() << "Reusing RM#"
+                  << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+          else
+            DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+          DEBUG(dbgs() << " from physreg "
+                << TRI->getName(PhysReg) << " for vreg"
+                << VirtReg <<" instead of reloading into physreg "
+                << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
+          unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
           MI.getOperand(i).setReg(RReg);
           MI.getOperand(i).setSubReg(0);
-          DEBUG(dbgs() << '\t' << *prior(MII));
+
+          // The only technical detail we have is that we don't know that
+          // PhysReg won't be clobbered by a reloaded stack slot that occurs
+          // later in the instruction.  In particular, consider 'op V1, V2'.
+          // If V1 is available in physreg R0, we would choose to reuse it
+          // here, instead of reloading it into the register the allocator
+          // indicated (say R1).  However, V2 might have to be reloaded
+          // later, and it might indicate that it needs to live in R0.  When
+          // this occurs, we need to have information available that
+          // indicates it is safe to use R1 for the reload instead of R0.
+          //
+          // To further complicate matters, we might conflict with an alias,
+          // or R0 and R1 might not be compatible with each other.  In this
+          // case, we actually insert a reload for V1 in R1, ensuring that
+          // we can get at R0 or its alias.
+          ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+                                  VRM->getPhys(VirtReg), VirtReg);
+          if (isTied)
+            // Only mark it clobbered if this is a use&def operand.
+            ReusedOperands.markClobbered(PhysReg);
           ++NumReused;
+
+          if (MI.getOperand(i).isKill() &&
+              ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+            // The store of this spilled value is potentially dead, but we
+            // won't know for certain until we've confirmed that the re-use
+            // above is valid, which means waiting until the other operands
+            // are processed. For now we just track the spill slot, we'll
+            // remove it after the other operands are processed if valid.
+
+            PotentialDeadStoreSlots.push_back(ReuseSlot);
+          }
+
+          // Mark is isKill if it's there no other uses of the same virtual
+          // register and it's not a two-address operand. IsKill will be
+          // unset if reg is reused.
+          if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+            MI.getOperand(i).setIsKill();
+            KilledMIRegs.insert(VirtReg);
+          }
+
           continue;
-        } // if (PhysReg)
-        
-        // Otherwise, reload it and remember that we have it.
-        PhysReg = VRM.getPhys(VirtReg);
-        assert(PhysReg && "Must map virtreg to physreg!");
+        }  // CanReuse
+
+        // Otherwise we have a situation where we have a two-address instruction
+        // whose mod/ref operand needs to be reloaded.  This reload is already
+        // available in some register "PhysReg", but if we used PhysReg as the
+        // operand to our 2-addr instruction, the instruction would modify
+        // PhysReg.  This isn't cool if something later uses PhysReg and expects
+        // to get its initial value.
+        //
+        // To avoid this problem, and to avoid doing a load right after a store,
+        // we emit a copy from PhysReg into the designated register for this
+        // operand.
+        unsigned DesignatedReg = VRM->getPhys(VirtReg);
+        assert(DesignatedReg && "Must map virtreg to physreg!");
 
         // Note that, if we reused a register for a previous operand, the
         // register we want to reload into might not actually be
         // available.  If this occurs, use the register indicated by the
         // reuser.
         if (ReusedOperands.hasReuses())
-          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, 
-                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
-        
-        RegInfo->setPhysRegUsed(PhysReg);
-        ReusedOperands.markClobbered(PhysReg);
-        if (AvoidReload)
-          ++NumAvoided;
-        else {
-          // Back-schedule reloads and remats.
-          MachineBasicBlock::iterator InsertLoc =
-            ComputeReloadLoc(MII, MBB.begin(), PhysReg, TRI, DoReMat,
-                             SSorRMId, TII, MF);
-
-          if (DoReMat) {
-            ReMaterialize(MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, VRM);
-          } else {
-            const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg);
-            TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SSorRMId, RC);
-            MachineInstr *LoadMI = prior(InsertLoc);
-            VRM.addSpillSlotUse(SSorRMId, LoadMI);
-            ++NumLoads;
-            DistanceMap.insert(std::make_pair(LoadMI, Dist++));
-          }
-          // This invalidates PhysReg.
-          Spills.ClobberPhysReg(PhysReg);
+          DesignatedReg = ReusedOperands.
+            GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+                            MaybeDeadStores, RegKills, KillOps, *VRM);
+
+        // If the mapped designated register is actually the physreg we have
+        // incoming, we don't need to inserted a dead copy.
+        if (DesignatedReg == PhysReg) {
+          // If this stack slot value is already available, reuse it!
+          if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+            DEBUG(dbgs() << "Reusing RM#"
+                  << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+          else
+            DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+          DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+                << " for vreg" << VirtReg
+                << " instead of reloading into same physreg.\n");
+          unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+          MI.getOperand(i).setReg(RReg);
+          MI.getOperand(i).setSubReg(0);
+          ReusedOperands.markClobbered(RReg);
+          ++NumReused;
+          continue;
+        }
 
-          // Any stores to this stack slot are not dead anymore.
-          if (!DoReMat)
-            MaybeDeadStores[SSorRMId] = NULL;
-          Spills.addAvailable(SSorRMId, PhysReg);
-          // Assumes this is the last use. IsKill will be unset if reg is reused
-          // unless it's a two-address operand.
-          if (!MI.isRegTiedToDefOperand(i) &&
-              KilledMIRegs.count(VirtReg) == 0) {
-            MI.getOperand(i).setIsKill();
-            KilledMIRegs.insert(VirtReg);
-          }
+        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+        MRI->setPhysRegUsed(DesignatedReg);
+        ReusedOperands.markClobbered(DesignatedReg);
 
-          UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-          DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-        }
-        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+        // Back-schedule reloads and remats.
+        MachineBasicBlock::iterator InsertLoc =
+          ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                           SSorRMId, TII, MF);
+
+        TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
+
+        MachineInstr *CopyMI = prior(InsertLoc);
+        CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+        UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+        // This invalidates DesignatedReg.
+        Spills.ClobberPhysReg(DesignatedReg);
+
+        Spills.addAvailable(ReuseSlot, DesignatedReg);
+        unsigned RReg =
+          SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
         MI.getOperand(i).setReg(RReg);
         MI.getOperand(i).setSubReg(0);
-      }
+        DEBUG(dbgs() << '\t' << *prior(MII));
+        ++NumReused;
+        continue;
+      } // if (PhysReg)
 
-      // Ok - now we can remove stores that have been confirmed dead.
-      for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
-        // This was the last use and the spilled value is still available
-        // for reuse. That means the spill was unnecessary!
-        int PDSSlot = PotentialDeadStoreSlots[j];
-        MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
-        if (DeadStore) {
-          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-          InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-          VRM.RemoveMachineInstrFromMaps(DeadStore);
-          MBB.erase(DeadStore);
-          MaybeDeadStores[PDSSlot] = NULL;
-          ++NumDSE;
+        // Otherwise, reload it and remember that we have it.
+      PhysReg = VRM->getPhys(VirtReg);
+      assert(PhysReg && "Must map virtreg to physreg!");
+
+      // Note that, if we reused a register for a previous operand, the
+      // register we want to reload into might not actually be
+      // available.  If this occurs, use the register indicated by the
+      // reuser.
+      if (ReusedOperands.hasReuses())
+        PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                    Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+      MRI->setPhysRegUsed(PhysReg);
+      ReusedOperands.markClobbered(PhysReg);
+      if (AvoidReload)
+        ++NumAvoided;
+      else {
+        // Back-schedule reloads and remats.
+        MachineBasicBlock::iterator InsertLoc =
+          ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, DoReMat,
+                           SSorRMId, TII, MF);
+
+        if (DoReMat) {
+          ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+        } else {
+          const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+          TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC);
+          MachineInstr *LoadMI = prior(InsertLoc);
+          VRM->addSpillSlotUse(SSorRMId, LoadMI);
+          ++NumLoads;
+          DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
         }
+        // This invalidates PhysReg.
+        Spills.ClobberPhysReg(PhysReg);
+
+        // Any stores to this stack slot are not dead anymore.
+        if (!DoReMat)
+          MaybeDeadStores[SSorRMId] = NULL;
+        Spills.addAvailable(SSorRMId, PhysReg);
+        // Assumes this is the last use. IsKill will be unset if reg is reused
+        // unless it's a two-address operand.
+        if (!MI.isRegTiedToDefOperand(i) &&
+            KilledMIRegs.count(VirtReg) == 0) {
+          MI.getOperand(i).setIsKill();
+          KilledMIRegs.insert(VirtReg);
+        }
+
+        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
       }
+      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+    }
 
+    // Ok - now we can remove stores that have been confirmed dead.
+    for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+      // This was the last use and the spilled value is still available
+      // for reuse. That means the spill was unnecessary!
+      int PDSSlot = PotentialDeadStoreSlots[j];
+      MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+      if (DeadStore) {
+        DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+        InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+        VRM->RemoveMachineInstrFromMaps(DeadStore);
+        MBB->erase(DeadStore);
+        MaybeDeadStores[PDSSlot] = NULL;
+        ++NumDSE;
+      }
+    }
 
-      DEBUG(dbgs() << '\t' << MI);
 
+    DEBUG(dbgs() << '\t' << MI);
 
-      // If we have folded references to memory operands, make sure we clear all
-      // physical registers that may contain the value of the spilled virtual
-      // register
-      SmallSet<int, 2> FoldedSS;
-      for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
-        unsigned VirtReg = I->second.first;
-        VirtRegMap::ModRef MR = I->second.second;
-        DEBUG(dbgs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
 
-        // MI2VirtMap be can updated which invalidate the iterator.
-        // Increment the iterator first.
-        ++I;
-        int SS = VRM.getStackSlot(VirtReg);
-        if (SS == VirtRegMap::NO_STACK_SLOT)
-          continue;
-        FoldedSS.insert(SS);
-        DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
-        
-        // If this folded instruction is just a use, check to see if it's a
-        // straight load from the virt reg slot.
-        if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
-          int FrameIdx;
-          unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
-          if (DestReg && FrameIdx == SS) {
-            // If this spill slot is available, turn it into a copy (or nothing)
-            // instead of leaving it as a load!
-            if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
-              DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
-              if (DestReg != InReg) {
-                const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
-                TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
-                MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
-                unsigned SubIdx = DefMO->getSubReg();
-                // Revisit the copy so we make sure to notice the effects of the
-                // operation on the destreg (either needing to RA it if it's 
-                // virtual or needing to clobber any values if it's physical).
-                NextMII = &MI;
-                --NextMII;  // backtrack to the copy.
-                NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-                // Propagate the sub-register index over.
-                if (SubIdx) {
-                  DefMO = NextMII->findRegisterDefOperand(DestReg);
-                  DefMO->setSubReg(SubIdx);
-                }
-
-                // Mark is killed.
-                MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg);
-                KillOpnd->setIsKill();
-
-                BackTracked = true;
-              } else {
-                DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-                // Unset last kill since it's being reused.
-                InvalidateKill(InReg, TRI, RegKills, KillOps);
-                Spills.disallowClobberPhysReg(InReg);
+    // If we have folded references to memory operands, make sure we clear all
+    // physical registers that may contain the value of the spilled virtual
+    // register
+    SmallSet<int, 2> FoldedSS;
+    for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
+      unsigned VirtReg = I->second.first;
+      VirtRegMap::ModRef MR = I->second.second;
+      DEBUG(dbgs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
+
+      // MI2VirtMap be can updated which invalidate the iterator.
+      // Increment the iterator first.
+      ++I;
+      int SS = VRM->getStackSlot(VirtReg);
+      if (SS == VirtRegMap::NO_STACK_SLOT)
+        continue;
+      FoldedSS.insert(SS);
+      DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
+
+      // If this folded instruction is just a use, check to see if it's a
+      // straight load from the virt reg slot.
+      if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+        int FrameIdx;
+        unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
+        if (DestReg && FrameIdx == SS) {
+          // If this spill slot is available, turn it into a copy (or nothing)
+          // instead of leaving it as a load!
+          if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
+            DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
+            if (DestReg != InReg) {
+              const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+              TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC);
+              MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
+              unsigned SubIdx = DefMO->getSubReg();
+              // Revisit the copy so we make sure to notice the effects of the
+              // operation on the destreg (either needing to RA it if it's
+              // virtual or needing to clobber any values if it's physical).
+              NextMII = &MI;
+              --NextMII;  // backtrack to the copy.
+              NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+              // Propagate the sub-register index over.
+              if (SubIdx) {
+                DefMO = NextMII->findRegisterDefOperand(DestReg);
+                DefMO->setSubReg(SubIdx);
               }
 
-              InvalidateKills(MI, TRI, RegKills, KillOps);
-              VRM.RemoveMachineInstrFromMaps(&MI);
-              MBB.erase(&MI);
-              Erased = true;
-              goto ProcessNextInst;
+              // Mark is killed.
+              MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg);
+              KillOpnd->setIsKill();
+
+              BackTracked = true;
+            } else {
+              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+              // Unset last kill since it's being reused.
+              InvalidateKill(InReg, TRI, RegKills, KillOps);
+              Spills.disallowClobberPhysReg(InReg);
             }
-          } else {
-            unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-            SmallVector<MachineInstr*, 4> NewMIs;
-            if (PhysReg &&
-                TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) {
-              MBB.insert(MII, NewMIs[0]);
+
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            VRM->RemoveMachineInstrFromMaps(&MI);
+            MBB->erase(&MI);
+            Erased = true;
+            goto ProcessNextInst;
+          }
+        } else {
+          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+          SmallVector<MachineInstr*, 4> NewMIs;
+          if (PhysReg &&
+              TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) {
+            MBB->insert(MII, NewMIs[0]);
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            VRM->RemoveMachineInstrFromMaps(&MI);
+            MBB->erase(&MI);
+            Erased = true;
+            --NextMII;  // backtrack to the unfolded instruction.
+            BackTracked = true;
+            goto ProcessNextInst;
+          }
+        }
+      }
+
+      // If this reference is not a use, any previous store is now dead.
+      // Otherwise, the store to this stack slot is not dead anymore.
+      MachineInstr* DeadStore = MaybeDeadStores[SS];
+      if (DeadStore) {
+        bool isDead = !(MR & VirtRegMap::isRef);
+        MachineInstr *NewStore = NULL;
+        if (MR & VirtRegMap::isModRef) {
+          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+          SmallVector<MachineInstr*, 4> NewMIs;
+          // We can reuse this physreg as long as we are allowed to clobber
+          // the value and there isn't an earlier def that has already clobbered
+          // the physreg.
+          if (PhysReg &&
+              !ReusedOperands.isClobbered(PhysReg) &&
+              Spills.canClobberPhysReg(PhysReg) &&
+              !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
+            MachineOperand *KillOpnd =
+              DeadStore->findRegisterUseOperand(PhysReg, true);
+            // Note, if the store is storing a sub-register, it's possible the
+            // super-register is needed below.
+            if (KillOpnd && !KillOpnd->getSubReg() &&
+                TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
+              MBB->insert(MII, NewMIs[0]);
+              NewStore = NewMIs[1];
+              MBB->insert(MII, NewStore);
+              VRM->addSpillSlotUse(SS, NewStore);
               InvalidateKills(MI, TRI, RegKills, KillOps);
-              VRM.RemoveMachineInstrFromMaps(&MI);
-              MBB.erase(&MI);
+              VRM->RemoveMachineInstrFromMaps(&MI);
+              MBB->erase(&MI);
               Erased = true;
+              --NextMII;
               --NextMII;  // backtrack to the unfolded instruction.
               BackTracked = true;
-              goto ProcessNextInst;
+              isDead = true;
+              ++NumSUnfold;
             }
           }
         }
 
-        // If this reference is not a use, any previous store is now dead.
-        // Otherwise, the store to this stack slot is not dead anymore.
-        MachineInstr* DeadStore = MaybeDeadStores[SS];
-        if (DeadStore) {
-          bool isDead = !(MR & VirtRegMap::isRef);
-          MachineInstr *NewStore = NULL;
-          if (MR & VirtRegMap::isModRef) {
-            unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-            SmallVector<MachineInstr*, 4> NewMIs;
-            // We can reuse this physreg as long as we are allowed to clobber
-            // the value and there isn't an earlier def that has already clobbered
-            // the physreg.
-            if (PhysReg &&
-                !ReusedOperands.isClobbered(PhysReg) &&
-                Spills.canClobberPhysReg(PhysReg) &&
-                !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
-              MachineOperand *KillOpnd =
-                DeadStore->findRegisterUseOperand(PhysReg, true);
-              // Note, if the store is storing a sub-register, it's possible the
-              // super-register is needed below.
-              if (KillOpnd && !KillOpnd->getSubReg() &&
-                  TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
-                MBB.insert(MII, NewMIs[0]);
-                NewStore = NewMIs[1];
-                MBB.insert(MII, NewStore);
-                VRM.addSpillSlotUse(SS, NewStore);
-                InvalidateKills(MI, TRI, RegKills, KillOps);
-                VRM.RemoveMachineInstrFromMaps(&MI);
-                MBB.erase(&MI);
-                Erased = true;
-                --NextMII;
-                --NextMII;  // backtrack to the unfolded instruction.
-                BackTracked = true;
-                isDead = true;
-                ++NumSUnfold;
-              }
+        if (isDead) {  // Previous store is dead.
+          // If we get here, the store is dead, nuke it now.
+          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+          InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+          VRM->RemoveMachineInstrFromMaps(DeadStore);
+          MBB->erase(DeadStore);
+          if (!NewStore)
+            ++NumDSE;
+        }
+
+        MaybeDeadStores[SS] = NULL;
+        if (NewStore) {
+          // Treat this store as a spill merged into a copy. That makes the
+          // stack slot value available.
+          VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
+          goto ProcessNextInst;
+        }
+      }
+
+      // If the spill slot value is available, and this is a new definition of
+      // the value, the value is not available anymore.
+      if (MR & VirtRegMap::isMod) {
+        // Notice that the value in this stack slot has been modified.
+        Spills.ModifyStackSlotOrReMat(SS);
+
+        // If this is *just* a mod of the value, check to see if this is just a
+        // store to the spill slot (i.e. the spill got merged into the copy). If
+        // so, realize that the vreg is available now, and add the store to the
+        // MaybeDeadStore info.
+        int StackSlot;
+        if (!(MR & VirtRegMap::isRef)) {
+          if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+            assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+                   "Src hasn't been allocated yet?");
+
+            if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot,
+                                    Spills, RegKills, KillOps, TRI)) {
+              NextMII = llvm::next(MII);
+              BackTracked = true;
+              goto ProcessNextInst;
             }
-          }
 
-          if (isDead) {  // Previous store is dead.
-            // If we get here, the store is dead, nuke it now.
-            DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-            InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-            VRM.RemoveMachineInstrFromMaps(DeadStore);
-            MBB.erase(DeadStore);
-            if (!NewStore)
-              ++NumDSE;
+            // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
+            // this as a potentially dead store in case there is a subsequent
+            // store into the stack slot without a read from it.
+            MaybeDeadStores[StackSlot] = &MI;
+
+            // If the stack slot value was previously available in some other
+            // register, change it now.  Otherwise, make the register
+            // available in PhysReg.
+            Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
           }
+        }
+      }
+    }
 
-          MaybeDeadStores[SS] = NULL;
-          if (NewStore) {
-            // Treat this store as a spill merged into a copy. That makes the
-            // stack slot value available.
-            VRM.virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
-            goto ProcessNextInst;
+    // Process all of the spilled defs.
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (!(MO.isReg() && MO.getReg() && MO.isDef()))
+        continue;
+
+      unsigned VirtReg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        // Check to see if this is a noop copy.  If so, eliminate the
+        // instruction before considering the dest reg to be changed.
+        // Also check if it's copying from an "undef", if so, we can't
+        // eliminate this or else the undef marker is lost and it will
+        // confuses the scavenger. This is extremely rare.
+        unsigned Src, Dst, SrcSR, DstSR;
+        if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
+            !MI.findRegisterUseOperand(Src)->isUndef()) {
+          ++NumDCE;
+          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+          SmallVector<unsigned, 2> KillRegs;
+          InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+          if (MO.isDead() && !KillRegs.empty()) {
+            // Source register or an implicit super/sub-register use is killed.
+            assert(KillRegs[0] == Dst ||
+                   TRI->isSubRegister(KillRegs[0], Dst) ||
+                   TRI->isSuperRegister(KillRegs[0], Dst));
+            // Last def is now dead.
+            TransferDeadness(Src, RegKills, KillOps);
           }
+          VRM->RemoveMachineInstrFromMaps(&MI);
+          MBB->erase(&MI);
+          Erased = true;
+          Spills.disallowClobberPhysReg(VirtReg);
+          goto ProcessNextInst;
         }
 
-        // If the spill slot value is available, and this is a new definition of
-        // the value, the value is not available anymore.
-        if (MR & VirtRegMap::isMod) {
-          // Notice that the value in this stack slot has been modified.
-          Spills.ModifyStackSlotOrReMat(SS);
-          
-          // If this is *just* a mod of the value, check to see if this is just a
-          // store to the spill slot (i.e. the spill got merged into the copy). If
-          // so, realize that the vreg is available now, and add the store to the
-          // MaybeDeadStore info.
-          int StackSlot;
-          if (!(MR & VirtRegMap::isRef)) {
-            if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
-              assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
-                     "Src hasn't been allocated yet?");
-
-              if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot,
-                                      Spills, RegKills, KillOps, TRI, VRM)) {
-                NextMII = llvm::next(MII);
-                BackTracked = true;
-                goto ProcessNextInst;
-              }
+        // If it's not a no-op copy, it clobbers the value in the destreg.
+        Spills.ClobberPhysReg(VirtReg);
+        ReusedOperands.markClobbered(VirtReg);
+
+        // Check to see if this instruction is a load from a stack slot into
+        // a register.  If so, this provides the stack slot value in the reg.
+        int FrameIdx;
+        if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+          assert(DestReg == VirtReg && "Unknown load situation!");
+
+          // If it is a folded reference, then it's not safe to clobber.
+          bool Folded = FoldedSS.count(FrameIdx);
+          // Otherwise, if it wasn't available, remember that it is now!
+          Spills.addAvailable(FrameIdx, DestReg, !Folded);
+          goto ProcessNextInst;
+        }
 
-              // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
-              // this as a potentially dead store in case there is a subsequent
-              // store into the stack slot without a read from it.
-              MaybeDeadStores[StackSlot] = &MI;
+        continue;
+      }
 
-              // If the stack slot value was previously available in some other
-              // register, change it now.  Otherwise, make the register
-              // available in PhysReg.
-              Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
-            }
-          }
+      unsigned SubIdx = MO.getSubReg();
+      bool DoReMat = VRM->isReMaterialized(VirtReg);
+      if (DoReMat)
+        ReMatDefs.insert(&MI);
+
+      // The only vregs left are stack slot definitions.
+      int StackSlot = VRM->getStackSlot(VirtReg);
+      const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+      // If this def is part of a two-address operand, make sure to execute
+      // the store from the correct physical register.
+      unsigned PhysReg;
+      unsigned TiedOp;
+      if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
+        PhysReg = MI.getOperand(TiedOp).getReg();
+        if (SubIdx) {
+          unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
+          assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
+                 "Can't find corresponding super-register!");
+          PhysReg = SuperReg;
+        }
+      } else {
+        PhysReg = VRM->getPhys(VirtReg);
+        if (ReusedOperands.isClobbered(PhysReg)) {
+          // Another def has taken the assigned physreg. It must have been a
+          // use&def which got it due to reuse. Undo the reuse!
+          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                      Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
         }
       }
 
-      // Process all of the spilled defs.
-      for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI.getOperand(i);
-        if (!(MO.isReg() && MO.getReg() && MO.isDef()))
-          continue;
+      assert(PhysReg && "VR not assigned a physical register?");
+      MRI->setPhysRegUsed(PhysReg);
+      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+      ReusedOperands.markClobbered(RReg);
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+
+      if (!MO.isDead()) {
+        MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+        SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
+          LastStore, Spills, ReMatDefs, RegKills, KillOps);
+        NextMII = llvm::next(MII);
 
-        unsigned VirtReg = MO.getReg();
-        if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
-          // Check to see if this is a noop copy.  If so, eliminate the
-          // instruction before considering the dest reg to be changed.
-          // Also check if it's copying from an "undef", if so, we can't
-          // eliminate this or else the undef marker is lost and it will
-          // confuses the scavenger. This is extremely rare.
+        // Check to see if this is a noop copy.  If so, eliminate the
+        // instruction before considering the dest reg to be changed.
+        {
           unsigned Src, Dst, SrcSR, DstSR;
-          if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
-              !MI.findRegisterUseOperand(Src)->isUndef()) {
+          if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
             ++NumDCE;
             DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-            SmallVector<unsigned, 2> KillRegs;
-            InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
-            if (MO.isDead() && !KillRegs.empty()) {
-              // Source register or an implicit super/sub-register use is killed.
-              assert(KillRegs[0] == Dst ||
-                     TRI->isSubRegister(KillRegs[0], Dst) ||
-                     TRI->isSuperRegister(KillRegs[0], Dst));
-              // Last def is now dead.
-              TransferDeadness(&MBB, Dist, Src, RegKills, KillOps, VRM);
-            }
-            VRM.RemoveMachineInstrFromMaps(&MI);
-            MBB.erase(&MI);
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            VRM->RemoveMachineInstrFromMaps(&MI);
+            MBB->erase(&MI);
             Erased = true;
-            Spills.disallowClobberPhysReg(VirtReg);
+            UpdateKills(*LastStore, TRI, RegKills, KillOps);
             goto ProcessNextInst;
           }
-
-          // If it's not a no-op copy, it clobbers the value in the destreg.
-          Spills.ClobberPhysReg(VirtReg);
-          ReusedOperands.markClobbered(VirtReg);
-   
-          // Check to see if this instruction is a load from a stack slot into
-          // a register.  If so, this provides the stack slot value in the reg.
-          int FrameIdx;
-          if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
-            assert(DestReg == VirtReg && "Unknown load situation!");
-
-            // If it is a folded reference, then it's not safe to clobber.
-            bool Folded = FoldedSS.count(FrameIdx);
-            // Otherwise, if it wasn't available, remember that it is now!
-            Spills.addAvailable(FrameIdx, DestReg, !Folded);
-            goto ProcessNextInst;
-          }
-              
-          continue;
-        }
-
-        unsigned SubIdx = MO.getSubReg();
-        bool DoReMat = VRM.isReMaterialized(VirtReg);
-        if (DoReMat)
-          ReMatDefs.insert(&MI);
-
-        // The only vregs left are stack slot definitions.
-        int StackSlot = VRM.getStackSlot(VirtReg);
-        const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
-
-        // If this def is part of a two-address operand, make sure to execute
-        // the store from the correct physical register.
-        unsigned PhysReg;
-        unsigned TiedOp;
-        if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
-          PhysReg = MI.getOperand(TiedOp).getReg();
-          if (SubIdx) {
-            unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
-            assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
-                   "Can't find corresponding super-register!");
-            PhysReg = SuperReg;
-          }
-        } else {
-          PhysReg = VRM.getPhys(VirtReg);
-          if (ReusedOperands.isClobbered(PhysReg)) {
-            // Another def has taken the assigned physreg. It must have been a
-            // use&def which got it due to reuse. Undo the reuse!
-            PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, 
-                               Spills, MaybeDeadStores, RegKills, KillOps, VRM);
-          }
         }
-
-        assert(PhysReg && "VR not assigned a physical register?");
-        RegInfo->setPhysRegUsed(PhysReg);
-        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-        ReusedOperands.markClobbered(RReg);
-        MI.getOperand(i).setReg(RReg);
-        MI.getOperand(i).setSubReg(0);
-
-        if (!MO.isDead()) {
-          MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
-          SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true,
-                            LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM);
-          NextMII = llvm::next(MII);
-
-          // Check to see if this is a noop copy.  If so, eliminate the
-          // instruction before considering the dest reg to be changed.
-          {
-            unsigned Src, Dst, SrcSR, DstSR;
-            if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
-              ++NumDCE;
-              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-              InvalidateKills(MI, TRI, RegKills, KillOps);
-              VRM.RemoveMachineInstrFromMaps(&MI);
-              MBB.erase(&MI);
-              Erased = true;
-              UpdateKills(*LastStore, TRI, RegKills, KillOps);
-              goto ProcessNextInst;
-            }
-          }
-        }    
       }
+    }
     ProcessNextInst:
-      // Delete dead instructions without side effects.
-      if (!Erased && !BackTracked && isSafeToDelete(MI)) {
-        InvalidateKills(MI, TRI, RegKills, KillOps);
-        VRM.RemoveMachineInstrFromMaps(&MI);
-        MBB.erase(&MI);
-        Erased = true;
-      }
-      if (!Erased)
-        DistanceMap.insert(std::make_pair(&MI, Dist++));
-      if (!Erased && !BackTracked) {
-        for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
-          UpdateKills(*II, TRI, RegKills, KillOps);
-      }
-      MII = NextMII;
+    // Delete dead instructions without side effects.
+    if (!Erased && !BackTracked && isSafeToDelete(MI)) {
+      InvalidateKills(MI, TRI, RegKills, KillOps);
+      VRM->RemoveMachineInstrFromMaps(&MI);
+      MBB->erase(&MI);
+      Erased = true;
     }
-
+    if (!Erased)
+      DistanceMap.insert(std::make_pair(&MI, DistanceMap.size()));
+    if (!Erased && !BackTracked) {
+      for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
+        UpdateKills(*II, TRI, RegKills, KillOps);
+    }
+    MII = NextMII;
   }
 
-};
-
 }
 
 llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 565509c..68471bd 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -165,7 +165,7 @@ void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) {
 
 void JITDebugRegisterer::UnregisterFunctionInternal(
     RegisteredFunctionsMap::iterator I) {
-  jit_code_entry *JITCodeEntry = I->second.second;
+  jit_code_entry *&JITCodeEntry = I->second.second;
 
   // Acquire the lock and do the unregistration.
   {
@@ -190,6 +190,9 @@ void JITDebugRegisterer::UnregisterFunctionInternal(
     __jit_debug_register_code();
   }
 
+  delete JITCodeEntry;
+  JITCodeEntry = NULL;
+
   // Free the ELF file in memory.
   std::string &Buffer = I->second.first;
   Buffer.clear();
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index da5435a..2f42e6b 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetFrameInfo.h"
@@ -67,33 +68,29 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
   unsigned PointerSize = TD->getPointerSize();
   int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
           PointerSize : -PointerSize;
-  bool IsLocal = false;
-  unsigned BaseLabelID = 0;
+  MCSymbol *BaseLabel = 0;
 
   for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
     const MachineMove &Move = Moves[i];
-    unsigned LabelID = Move.getLabelID();
+    MCSymbol *Label = Move.getLabel();
     
-    if (LabelID) {
-      // Throw out move if the label is invalid.
-      if (MMI->isLabelDeleted(LabelID))
-        continue;
-    }
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined())
+      continue;
     
     intptr_t LabelPtr = 0;
-    if (LabelID) LabelPtr = JCE->getLabelAddress(LabelID);
+    if (Label) LabelPtr = JCE->getLabelAddress(Label);
 
     const MachineLocation &Dst = Move.getDestination();
     const MachineLocation &Src = Move.getSource();
     
     // Advance row if new location.
-    if (BaseLabelPtr && LabelID && (BaseLabelID != LabelID || !IsLocal)) {
+    if (BaseLabelPtr && Label && BaseLabel != Label) {
       JCE->emitByte(dwarf::DW_CFA_advance_loc4);
       JCE->emitInt32(LabelPtr - BaseLabelPtr);
       
-      BaseLabelID = LabelID; 
+      BaseLabel = Label; 
       BaseLabelPtr = LabelPtr;
-      IsLocal = true;
     }
     
     // If advancing cfa.
@@ -169,13 +166,6 @@ static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
 
 namespace {
 
-struct KeyInfo {
-  static inline unsigned getEmptyKey() { return -1U; }
-  static inline unsigned getTombstoneKey() { return -2U; }
-  static unsigned getHashValue(const unsigned &Key) { return Key; }
-  static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
-};
-
 /// ActionEntry - Structure describing an entry in the actions table.
 struct ActionEntry {
   int ValueForTypeID; // The value to write - may not be equal to the type id.
@@ -191,13 +181,13 @@ struct PadRange {
   unsigned RangeIndex;
 };
 
-typedef DenseMap<unsigned, PadRange, KeyInfo> RangeMapType;
+typedef DenseMap<MCSymbol*, PadRange> RangeMapType;
 
 /// CallSiteEntry - Structure describing an entry in the call-site table.
 struct CallSiteEntry {
-  unsigned BeginLabel; // zero indicates the start of the function.
-  unsigned EndLabel;   // zero indicates the end of the function.
-  unsigned PadLabel;   // zero indicates that there is no landing pad.
+  MCSymbol *BeginLabel; // zero indicates the start of the function.
+  MCSymbol *EndLabel;   // zero indicates the end of the function.
+  MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
   unsigned Action;
 };
 
@@ -308,7 +298,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
   for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
     const LandingPadInfo *LandingPad = LandingPads[i];
     for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
-      unsigned BeginLabel = LandingPad->BeginLabels[j];
+      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
       assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
       PadRange P = { i, j };
       PadMap[BeginLabel] = P;
@@ -316,7 +306,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
   }
 
   bool MayThrow = false;
-  unsigned LastLabel = 0;
+  MCSymbol *LastLabel = 0;
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
         I != E; ++I) {
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
@@ -326,7 +316,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
         continue;
       }
 
-      unsigned BeginLabel = MI->getOperand(0).getImm();
+      MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
       assert(BeginLabel && "Invalid label!");
 
       if (BeginLabel == LastLabel)
@@ -718,22 +708,20 @@ JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
 
   for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
     const MachineMove &Move = Moves[i];
-    unsigned LabelID = Move.getLabelID();
+    MCSymbol *Label = Move.getLabel();
     
-    if (LabelID) {
-      // Throw out move if the label is invalid.
-      if (MMI->isLabelDeleted(LabelID))
-        continue;
-    }
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined())
+      continue;
     
     intptr_t LabelPtr = 0;
-    if (LabelID) LabelPtr = JCE->getLabelAddress(LabelID);
+    if (Label) LabelPtr = JCE->getLabelAddress(Label);
 
     const MachineLocation &Dst = Move.getDestination();
     const MachineLocation &Src = Move.getSource();
     
     // Advance row if new location.
-    if (BaseLabelPtr && LabelID && (BaseLabelPtr != LabelPtr || !IsLocal)) {
+    if (BaseLabelPtr && Label && (BaseLabelPtr != LabelPtr || !IsLocal)) {
       FinalSize++;
       FinalSize += PointerSize;
       BaseLabelPtr = LabelPtr;
@@ -891,7 +879,7 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
   for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
     const LandingPadInfo *LandingPad = LandingPads[i];
     for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
-      unsigned BeginLabel = LandingPad->BeginLabels[j];
+      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
       assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
       PadRange P = { i, j };
       PadMap[BeginLabel] = P;
@@ -899,7 +887,7 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
   }
 
   bool MayThrow = false;
-  unsigned LastLabel = 0;
+  MCSymbol *LastLabel = 0;
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
         I != E; ++I) {
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
@@ -909,9 +897,8 @@ JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
         continue;
       }
 
-      unsigned BeginLabel = MI->getOperand(0).getImm();
-      assert(BeginLabel && "Invalid label!");
-
+      MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+      
       if (BeginLabel == LastLabel)
         MayThrow = false;
 
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 783ebb4..83acb5d 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -341,7 +341,7 @@ namespace {
 
     /// LabelLocations - This vector is a mapping from Label ID's to their
     /// address.
-    std::vector<uintptr_t> LabelLocations;
+    DenseMap<MCSymbol*, uintptr_t> LabelLocations;
 
     /// MMI - Machine module info for exception informations
     MachineModuleInfo* MMI;
@@ -459,16 +459,13 @@ namespace {
 
     virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
 
-    virtual void emitLabel(uint64_t LabelID) {
-      if (LabelLocations.size() <= LabelID)
-        LabelLocations.resize((LabelID+1)*2);
-      LabelLocations[LabelID] = getCurrentPCValue();
+    virtual void emitLabel(MCSymbol *Label) {
+      LabelLocations[Label] = getCurrentPCValue();
     }
 
-    virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
-      assert(LabelLocations.size() > (unsigned)LabelID &&
-             LabelLocations[LabelID] && "Label not emitted!");
-      return LabelLocations[LabelID];
+    virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
+      assert(LabelLocations.count(Label) && "Label not emitted!");
+      return LabelLocations.find(Label)->second;
     }
 
     virtual void setModuleInfo(MachineModuleInfo* Info) {
@@ -1393,6 +1390,8 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
 void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
   if (TheJIT->getJITInfo().hasCustomJumpTables())
     return;
+  if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline)
+    return;
 
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   if (JT.empty()) return;
@@ -1420,6 +1419,8 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
 
   
   switch (MJTI->getEntryKind()) {
+  case MachineJumpTableInfo::EK_Inline:
+    return;
   case MachineJumpTableInfo::EK_BlockAddress: {
     // EK_BlockAddress - Each entry is a plain address of block, e.g.:
     //     .word LBB123
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index bda700b..c96ff82 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -69,6 +69,7 @@ MCAsmInfo::MCAsmInfo() {
   DwarfRequiresFrameSection = true;
   DwarfUsesInlineInfoSection = false;
   DwarfSectionOffsetDirective = 0;
+  HasMicrosoftFastStdCallMangling = false;
 
   AsmTransCBE = 0;
 }
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 9130493..5170206 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -35,4 +35,5 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
   AbsoluteEHSectionOffsets = false;
   SupportsDebugInformation = true;
   DwarfSectionOffsetDirective = "\t.secrel32\t";
+  HasMicrosoftFastStdCallMangling = true;
 }
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 66a0a24..7f39471 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -41,11 +41,10 @@ class MCAsmStreamer : public MCStreamer {
 
 public:
   MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
-                const MCAsmInfo &mai,
                 bool isLittleEndian, bool isVerboseAsm, MCInstPrinter *printer,
                 MCCodeEmitter *emitter, bool showInst)
-    : MCStreamer(Context), OS(os), MAI(mai), InstPrinter(printer),
-      Emitter(emitter), CommentStream(CommentToEmit),
+    : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+      InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
       IsLittleEndian(isLittleEndian), IsVerboseAsm(isVerboseAsm),
       ShowInst(showInst) {
     if (InstPrinter && IsVerboseAsm)
@@ -654,9 +653,9 @@ void MCAsmStreamer::Finish() {
 
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
                                     formatted_raw_ostream &OS,
-                                    const MCAsmInfo &MAI, bool isLittleEndian,
+                                    bool isLittleEndian,
                                     bool isVerboseAsm, MCInstPrinter *IP,
                                     MCCodeEmitter *CE, bool ShowInst) {
-  return new MCAsmStreamer(Context, OS, MAI, isLittleEndian, isVerboseAsm,
+  return new MCAsmStreamer(Context, OS, isLittleEndian, isVerboseAsm,
                            IP, CE, ShowInst);
 }
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 00b02e0..4cf8b7e 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -9,6 +9,7 @@
 
 #define DEBUG_TYPE "assembler"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
@@ -23,6 +24,8 @@
 #include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
 
 // FIXME: Gross.
 #include "../Target/X86/X86FixupKinds.h"
@@ -49,8 +52,7 @@ static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW);
 static bool isVirtualSection(const MCSection &Section) {
   // FIXME: Lame.
   const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
-  unsigned Type = SMO.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE;
-  return (Type == MCSectionMachO::S_ZEROFILL);
+  return (SMO.getType() == MCSectionMachO::S_ZEROFILL);
 }
 
 static unsigned getFixupKindLog2Size(unsigned Kind) {
@@ -84,14 +86,19 @@ class MachObjectWriter {
     Header_Magic64 = 0xFEEDFACF
   };
 
-  static const unsigned Header32Size = 28;
-  static const unsigned Header64Size = 32;
-  static const unsigned SegmentLoadCommand32Size = 56;
-  static const unsigned Section32Size = 68;
-  static const unsigned SymtabLoadCommandSize = 24;
-  static const unsigned DysymtabLoadCommandSize = 80;
-  static const unsigned Nlist32Size = 12;
-  static const unsigned RelocationInfoSize = 8;
+  enum {
+    Header32Size = 28,
+    Header64Size = 32,
+    SegmentLoadCommand32Size = 56,
+    SegmentLoadCommand64Size = 72,
+    Section32Size = 68,
+    Section64Size = 80,
+    SymtabLoadCommandSize = 24,
+    DysymtabLoadCommandSize = 80,
+    Nlist32Size = 12,
+    Nlist64Size = 16,
+    RelocationInfoSize = 8
+  };
 
   enum HeaderFileType {
     HFT_Object = 0x1
@@ -104,7 +111,8 @@ class MachObjectWriter {
   enum LoadCommandType {
     LCT_Segment = 0x1,
     LCT_Symtab = 0x2,
-    LCT_Dysymtab = 0xb
+    LCT_Dysymtab = 0xb,
+    LCT_Segment64 = 0x19
   };
 
   // See <mach-o/nlist.h>.
@@ -159,11 +167,12 @@ class MachObjectWriter {
   };
 
   raw_ostream &OS;
-  bool IsLSB;
+  unsigned Is64Bit : 1;
+  unsigned IsLSB : 1;
 
 public:
-  MachObjectWriter(raw_ostream &_OS, bool _IsLSB = true)
-    : OS(_OS), IsLSB(_IsLSB) {
+  MachObjectWriter(raw_ostream &_OS, bool _Is64Bit, bool _IsLSB = true)
+    : OS(_OS), Is64Bit(_Is64Bit), IsLSB(_IsLSB) {
   }
 
   /// @name Helper Methods
@@ -220,22 +229,23 @@ public:
 
   /// @}
 
-  void WriteHeader32(unsigned NumLoadCommands, unsigned LoadCommandsSize,
-                     bool SubsectionsViaSymbols) {
+  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
+                   bool SubsectionsViaSymbols) {
     uint32_t Flags = 0;
 
     if (SubsectionsViaSymbols)
       Flags |= HF_SubsectionsViaSymbols;
 
-    // struct mach_header (28 bytes)
+    // struct mach_header (28 bytes) or
+    // struct mach_header_64 (32 bytes)
 
     uint64_t Start = OS.tell();
     (void) Start;
 
-    Write32(Header_Magic32);
+    Write32(Is64Bit ? Header_Magic64 : Header_Magic32);
 
     // FIXME: Support cputype.
-    Write32(MachO::CPUTypeI386);
+    Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386);
     // FIXME: Support cpusubtype.
     Write32(MachO::CPUSubType_I386_ALL);
     Write32(HFT_Object);
@@ -243,48 +253,62 @@ public:
                                  // segment.
     Write32(LoadCommandsSize);
     Write32(Flags);
+    if (Is64Bit)
+      Write32(0); // reserved
 
-    assert(OS.tell() - Start == Header32Size);
+    assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size);
   }
 
-  /// WriteSegmentLoadCommand32 - Write a 32-bit segment load command.
+  /// WriteSegmentLoadCommand - Write a segment load command.
   ///
   /// \arg NumSections - The number of sections in this segment.
   /// \arg SectionDataSize - The total size of the sections.
-  void WriteSegmentLoadCommand32(unsigned NumSections,
-                                 uint64_t VMSize,
-                                 uint64_t SectionDataStartOffset,
-                                 uint64_t SectionDataSize) {
-    // struct segment_command (56 bytes)
+  void WriteSegmentLoadCommand(unsigned NumSections,
+                               uint64_t VMSize,
+                               uint64_t SectionDataStartOffset,
+                               uint64_t SectionDataSize) {
+    // struct segment_command (56 bytes) or
+    // struct segment_command_64 (72 bytes)
 
     uint64_t Start = OS.tell();
     (void) Start;
 
-    Write32(LCT_Segment);
-    Write32(SegmentLoadCommand32Size + NumSections * Section32Size);
+    unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size :
+      SegmentLoadCommand32Size;
+    Write32(Is64Bit ? LCT_Segment64 : LCT_Segment);
+    Write32(SegmentLoadCommandSize +
+            NumSections * (Is64Bit ? Section64Size : Section32Size));
 
     WriteString("", 16);
-    Write32(0); // vmaddr
-    Write32(VMSize); // vmsize
-    Write32(SectionDataStartOffset); // file offset
-    Write32(SectionDataSize); // file size
+    if (Is64Bit) {
+      Write64(0); // vmaddr
+      Write64(VMSize); // vmsize
+      Write64(SectionDataStartOffset); // file offset
+      Write64(SectionDataSize); // file size
+    } else {
+      Write32(0); // vmaddr
+      Write32(VMSize); // vmsize
+      Write32(SectionDataStartOffset); // file offset
+      Write32(SectionDataSize); // file size
+    }
     Write32(0x7); // maxprot
     Write32(0x7); // initprot
     Write32(NumSections);
     Write32(0); // flags
 
-    assert(OS.tell() - Start == SegmentLoadCommand32Size);
+    assert(OS.tell() - Start == SegmentLoadCommandSize);
   }
 
-  void WriteSection32(const MCSectionData &SD, uint64_t FileOffset,
-                      uint64_t RelocationsStart, unsigned NumRelocations) {
+  void WriteSection(const MCSectionData &SD, uint64_t FileOffset,
+                    uint64_t RelocationsStart, unsigned NumRelocations) {
     // The offset is unused for virtual sections.
     if (isVirtualSection(SD.getSection())) {
       assert(SD.getFileSize() == 0 && "Invalid file size!");
       FileOffset = 0;
     }
 
-    // struct section (68 bytes)
+    // struct section (68 bytes) or
+    // struct section_64 (80 bytes)
 
     uint64_t Start = OS.tell();
     (void) Start;
@@ -294,8 +318,13 @@ public:
       static_cast<const MCSectionMachO&>(SD.getSection());
     WriteString(Section.getSectionName(), 16);
     WriteString(Section.getSegmentName(), 16);
-    Write32(SD.getAddress()); // address
-    Write32(SD.getSize()); // size
+    if (Is64Bit) {
+      Write64(SD.getAddress()); // address
+      Write64(SD.getSize()); // size
+    } else {
+      Write32(SD.getAddress()); // address
+      Write32(SD.getSize()); // size
+    }
     Write32(FileOffset);
 
     unsigned Flags = Section.getTypeAndAttributes();
@@ -309,8 +338,10 @@ public:
     Write32(Flags);
     Write32(0); // reserved1
     Write32(Section.getStubSize()); // reserved2
+    if (Is64Bit)
+      Write32(0); // reserved3
 
-    assert(OS.tell() - Start == Section32Size);
+    assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size);
   }
 
   void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
@@ -368,7 +399,7 @@ public:
     assert(OS.tell() - Start == DysymtabLoadCommandSize);
   }
 
-  void WriteNlist32(MachSymbolData &MSD) {
+  void WriteNlist(MachSymbolData &MSD) {
     MCSymbolData &Data = *MSD.SymbolData;
     const MCSymbol &Symbol = Data.getSymbol();
     uint8_t Type = 0;
@@ -399,7 +430,7 @@ public:
       if (Symbol.isAbsolute()) {
         llvm_unreachable("FIXME: Not yet implemented!");
       } else {
-        Address = Data.getFragment()->getAddress() + Data.getOffset();
+        Address = Data.getAddress();
       }
     } else if (Data.isCommon()) {
       // Common symbols are encoded with the size in the address
@@ -427,7 +458,10 @@ public:
     // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
     // value.
     Write16(Flags);
-    Write32(Address);
+    if (Is64Bit)
+      Write64(Address);
+    else
+      Write32(Address);
   }
 
   struct MachRelocationEntry {
@@ -437,7 +471,6 @@ public:
   void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCFragment &Fragment,
                                       MCAsmFixup &Fixup,
                                       const MCValue &Target,
-                             DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
                                      std::vector<MachRelocationEntry> &Relocs) {
     uint32_t Address = Fragment.getOffset() + Fixup.Offset;
     unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
@@ -446,17 +479,17 @@ public:
 
     // See <reloc.h>.
     const MCSymbol *A = Target.getSymA();
-    MCSymbolData *A_SD = SymbolMap.lookup(A);
+    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
 
     if (!A_SD->getFragment())
       llvm_report_error("symbol '" + A->getName() +
                         "' can not be undefined in a subtraction expression");
 
-    uint32_t Value = A_SD->getFragment()->getAddress() + A_SD->getOffset();
+    uint32_t Value = A_SD->getAddress();
     uint32_t Value2 = 0;
 
     if (const MCSymbol *B = Target.getSymB()) {
-      MCSymbolData *B_SD = SymbolMap.lookup(B);
+      MCSymbolData *B_SD = &Asm.getSymbolData(*B);
 
       if (!B_SD->getFragment())
         llvm_report_error("symbol '" + B->getName() +
@@ -468,22 +501,9 @@ public:
       // relocation types from the linkers point of view, this is done solely
       // for pedantic compatibility with 'as'.
       Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference;
-      Value2 = B_SD->getFragment()->getAddress() + B_SD->getOffset();
+      Value2 = B_SD->getAddress();
     }
 
-    // The value which goes in the fixup is current value of the expression.
-    Fixup.FixedValue = Value - Value2 + Target.getConstant();
-    if (IsPCRel)
-      Fixup.FixedValue -= Address;
-
-    // If this fixup is a vanilla PC relative relocation for a local label, we
-    // don't need a relocation.
-    //
-    // FIXME: Implement proper atom support.
-    if (IsPCRel && Target.getSymA() && Target.getSymA()->isTemporary() &&
-        !Target.getSymB())
-      return;
-
     MachRelocationEntry MRE;
     MRE.Word0 = ((Address   <<  0) |
                  (Type      << 24) |
@@ -507,14 +527,17 @@ public:
 
   void ComputeRelocationInfo(MCAssembler &Asm, MCDataFragment &Fragment,
                              MCAsmFixup &Fixup,
-                             DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap,
                              std::vector<MachRelocationEntry> &Relocs) {
     unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
     unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
 
+    // FIXME: Share layout object.
+    MCAsmLayout Layout(Asm);
+
+    // Evaluate the fixup; if the value was resolved, no relocation is needed.
     MCValue Target;
-    if (!Fixup.Value->EvaluateAsRelocatable(Target))
-      llvm_report_error("expected relocatable expression");
+    if (Asm.EvaluateFixup(Layout, Fixup, &Fragment, Target, Fixup.FixedValue))
+      return;
 
     // If this is a difference or a defined symbol plus an offset, then we need
     // a scattered relocation entry.
@@ -525,7 +548,7 @@ public:
         (Target.getSymA() && !Target.getSymA()->isUndefined() &&
          Offset))
       return ComputeScatteredRelocationInfo(Asm, Fragment, Fixup, Target,
-                                            SymbolMap, Relocs);
+                                            Relocs);
 
     // See <reloc.h>.
     uint32_t Address = Fragment.getOffset() + Fixup.Offset;
@@ -537,13 +560,13 @@ public:
     if (Target.isAbsolute()) { // constant
       // SymbolNum of 0 indicates the absolute section.
       //
-      // FIXME: When is this generated?
+      // FIXME: Currently, these are never generated (see code below). I cannot
+      // find a case where they are actually emitted.
       Type = RIT_Vanilla;
       Value = 0;
-      llvm_unreachable("FIXME: Not yet implemented!");
     } else {
       const MCSymbol *Symbol = Target.getSymA();
-      MCSymbolData *SD = SymbolMap.lookup(Symbol);
+      MCSymbolData *SD = &Asm.getSymbolData(*Symbol);
 
       if (Symbol->isUndefined()) {
         IsExtern = 1;
@@ -559,24 +582,12 @@ public:
           if (&*it == SD->getFragment()->getParent())
             break;
         assert(it != ie && "Unable to find section index!");
-        Value = SD->getFragment()->getAddress() + SD->getOffset();
+        Value = SD->getAddress();
       }
 
       Type = RIT_Vanilla;
     }
 
-    // The value which goes in the fixup is current value of the expression.
-    Fixup.FixedValue = Value + Target.getConstant();
-    if (IsPCRel)
-      Fixup.FixedValue -= Address;
-
-    // If this fixup is a vanilla PC relative relocation for a local label, we
-    // don't need a relocation.
-    //
-    // FIXME: Implement proper atom support.
-    if (IsPCRel && Target.getSymA() && Target.getSymA()->isTemporary())
-      return;
-
     // struct relocation_info (8 bytes)
     MachRelocationEntry MRE;
     MRE.Word0 = Address;
@@ -588,8 +599,7 @@ public:
     Relocs.push_back(MRE);
   }
 
-  void BindIndirectSymbols(MCAssembler &Asm,
-                           DenseMap<const MCSymbol*,MCSymbolData*> &SymbolMap) {
+  void BindIndirectSymbols(MCAssembler &Asm) {
     // This is the point where 'as' creates actual symbols for indirect symbols
     // (in the following two passes). It would be easier for us to do this
     // sooner when we see the attribute, but that makes getting the order in the
@@ -604,14 +614,10 @@ public:
       const MCSectionMachO &Section =
         static_cast<const MCSectionMachO&>(it->SectionData->getSection());
 
-      unsigned Type =
-        Section.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE;
-      if (Type != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
         continue;
 
-      MCSymbolData *&Entry = SymbolMap[it->Symbol];
-      if (!Entry)
-        Entry = new MCSymbolData(*it->Symbol, 0, 0, &Asm);
+      Asm.getOrCreateSymbolData(*it->Symbol);
     }
 
     // Then lazy symbol pointers and symbol stubs.
@@ -621,21 +627,17 @@ public:
       const MCSectionMachO &Section =
         static_cast<const MCSectionMachO&>(it->SectionData->getSection());
 
-      unsigned Type =
-        Section.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE;
-      if (Type != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
-          Type != MCSectionMachO::S_SYMBOL_STUBS)
+      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
         continue;
 
-      MCSymbolData *&Entry = SymbolMap[it->Symbol];
-      if (!Entry) {
-        Entry = new MCSymbolData(*it->Symbol, 0, 0, &Asm);
-
-        // Set the symbol type to undefined lazy, but only on construction.
-        //
-        // FIXME: Do not hardcode.
-        Entry->setFlags(Entry->getFlags() | 0x0001);
-      }
+      // Set the symbol type to undefined lazy, but only on construction.
+      //
+      // FIXME: Do not hardcode.
+      bool Created;
+      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+      if (Created)
+        Entry.setFlags(Entry.getFlags() | 0x0001);
     }
   }
 
@@ -672,7 +674,10 @@ public:
       const MCSymbol &Symbol = it->getSymbol();
 
       // Ignore assembler temporaries.
-      if (it->getSymbol().isTemporary())
+      if (it->getSymbol().isTemporary() &&
+          (!it->getFragment() ||
+           !Asm.getBackend().doesSectionRequireSymbols(
+             it->getFragment()->getParent()->getSection())))
         continue;
 
       if (!it->isExternal() && !Symbol.isUndefined())
@@ -708,7 +713,10 @@ public:
       const MCSymbol &Symbol = it->getSymbol();
 
       // Ignore assembler temporaries.
-      if (it->getSymbol().isTemporary())
+      if (it->getSymbol().isTemporary() &&
+          (!it->getFragment() ||
+           !Asm.getBackend().doesSectionRequireSymbols(
+             it->getFragment()->getParent()->getSection())))
         continue;
 
       if (it->isExternal() || Symbol.isUndefined())
@@ -756,16 +764,8 @@ public:
   void WriteObject(MCAssembler &Asm) {
     unsigned NumSections = Asm.size();
 
-    // Compute the symbol -> symbol data map.
-    //
-    // FIXME: This should not be here.
-    DenseMap<const MCSymbol*, MCSymbolData *> SymbolMap;
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it)
-      SymbolMap[&it->getSymbol()] = it;
-
     // Create symbol data for any indirect symbols.
-    BindIndirectSymbols(Asm, SymbolMap);
+    BindIndirectSymbols(Asm);
 
     // Compute symbol table information.
     SmallString<256> StringTable;
@@ -782,7 +782,8 @@ public:
     // The section data starts after the header, the segment load command (and
     // section headers) and the symbol table.
     unsigned NumLoadCommands = 1;
-    uint64_t LoadCommandsSize =
+    uint64_t LoadCommandsSize = Is64Bit ?
+      SegmentLoadCommand64Size + NumSections * Section64Size :
       SegmentLoadCommand32Size + NumSections * Section32Size;
 
     // Add the symbol table load command sizes, if used.
@@ -793,7 +794,8 @@ public:
 
     // Compute the total size of the section data, as well as its file size and
     // vm size.
-    uint64_t SectionDataStart = Header32Size + LoadCommandsSize;
+    uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size)
+      + LoadCommandsSize;
     uint64_t SectionDataSize = 0;
     uint64_t SectionDataFileSize = 0;
     uint64_t VMSize = 0;
@@ -819,10 +821,10 @@ public:
     SectionDataFileSize += SectionDataPadding;
 
     // Write the prolog, starting with the header and load command...
-    WriteHeader32(NumLoadCommands, LoadCommandsSize,
-                  Asm.getSubsectionsViaSymbols());
-    WriteSegmentLoadCommand32(NumSections, VMSize,
-                              SectionDataStart, SectionDataSize);
+    WriteHeader(NumLoadCommands, LoadCommandsSize,
+                Asm.getSubsectionsViaSymbols());
+    WriteSegmentLoadCommand(NumSections, VMSize,
+                            SectionDataStart, SectionDataSize);
 
     // ... and then the section headers.
     //
@@ -845,11 +847,11 @@ public:
         if (MCDataFragment *DF = dyn_cast<MCDataFragment>(&*it2))
           for (unsigned i = 0, e = DF->fixup_size(); i != e; ++i)
             ComputeRelocationInfo(Asm, *DF, DF->getFixups()[e - i - 1],
-                                  SymbolMap, RelocInfos);
+                                  RelocInfos);
 
       unsigned NumRelocs = RelocInfos.size() - NumRelocsStart;
       uint64_t SectionStart = SectionDataStart + SD.getAddress();
-      WriteSection32(SD, SectionStart, RelocTableEnd, NumRelocs);
+      WriteSection(SD, SectionStart, RelocTableEnd, NumRelocs);
       RelocTableEnd += NumRelocs * RelocationInfoSize;
     }
 
@@ -876,7 +878,8 @@ public:
 
       // The string table is written after symbol table.
       uint64_t StringTableOffset =
-        SymbolTableOffset + NumSymTabSymbols * Nlist32Size;
+        SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size :
+                                                Nlist32Size);
       WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
                              StringTableOffset, StringTable.size());
 
@@ -909,12 +912,10 @@ public:
         // special handling.
         const MCSectionMachO &Section =
           static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-        unsigned Type =
-          Section.getTypeAndAttributes() & MCSectionMachO::SECTION_TYPE;
-        if (Type == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
           // If this symbol is defined and internal, mark it as such.
           if (it->Symbol->isDefined() &&
-              !SymbolMap.lookup(it->Symbol)->isExternal()) {
+              !Asm.getSymbolData(*it->Symbol).isExternal()) {
             uint32_t Flags = ISF_Local;
             if (it->Symbol->isAbsolute())
               Flags |= ISF_Absolute;
@@ -923,18 +924,18 @@ public:
           }
         }
 
-        Write32(SymbolMap[it->Symbol]->getIndex());
+        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
       }
 
       // FIXME: Check that offsets match computed ones.
 
       // Write the symbol table entries.
       for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-        WriteNlist32(LocalSymbolData[i]);
+        WriteNlist(LocalSymbolData[i]);
       for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-        WriteNlist32(ExternalSymbolData[i]);
+        WriteNlist(ExternalSymbolData[i]);
       for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-        WriteNlist32(UndefinedSymbolData[i]);
+        WriteNlist(UndefinedSymbolData[i]);
 
       // Write the string table.
       OS << StringTable.str();
@@ -1006,15 +1007,65 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
 
 /* *** */
 
-MCAssembler::MCAssembler(MCContext &_Context, raw_ostream &_OS)
-  : Context(_Context), OS(_OS), SubsectionsViaSymbols(false)
+MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend,
+                         raw_ostream &_OS)
+  : Context(_Context), Backend(_Backend), OS(_OS), SubsectionsViaSymbols(false)
 {
 }
 
 MCAssembler::~MCAssembler() {
 }
 
+bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup,
+                                MCDataFragment *DF,
+                                MCValue &Target, uint64_t &Value) const {
+  if (!Fixup.Value->EvaluateAsRelocatable(Target, &Layout))
+    llvm_report_error("expected relocatable expression");
+
+  // FIXME: How do non-scattered symbols work in ELF? I presume the linker
+  // doesn't support small relocations, but then under what criteria does the
+  // assembler allow symbol differences?
+
+  Value = Target.getConstant();
+
+  // FIXME: This "resolved" check isn't quite right. The assumption is that if
+  // we have a PCrel access to a temporary, then that temporary is in the same
+  // atom, and so the value is resolved. We need explicit atom's to implement
+  // this more precisely.
+  bool IsResolved = true, IsPCRel = isFixupKindPCRel(Fixup.Kind);
+  if (const MCSymbol *Symbol = Target.getSymA()) {
+    if (Symbol->isDefined())
+      Value += getSymbolData(*Symbol).getAddress();
+    else
+      IsResolved = false;
+
+    // With scattered symbols, we assume anything that isn't a PCrel temporary
+    // access can have an arbitrary value.
+    if (getBackend().hasScatteredSymbols() &&
+        (!IsPCRel || !Symbol->isTemporary()))
+      IsResolved = false;
+  }
+  if (const MCSymbol *Symbol = Target.getSymB()) {
+    if (Symbol->isDefined())
+      Value -= getSymbolData(*Symbol).getAddress();
+    else
+      IsResolved = false;
+
+    // With scattered symbols, we assume anything that isn't a PCrel temporary
+    // access can have an arbitrary value.
+    if (getBackend().hasScatteredSymbols() &&
+        (!IsPCRel || !Symbol->isTemporary()))
+      IsResolved = false;
+  }
+
+  if (IsPCRel)
+    Value -= DF->getAddress() + Fixup.Offset;
+
+  return IsResolved;
+}
+
 void MCAssembler::LayoutSection(MCSectionData &SD) {
+  MCAsmLayout Layout(*this);
   uint64_t Address = SD.getAddress();
 
   for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) {
@@ -1043,21 +1094,17 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
     case MCFragment::FT_Org: {
       MCOrgFragment &OF = cast<MCOrgFragment>(F);
 
-      MCValue Target;
-      if (!OF.getOffset().EvaluateAsRelocatable(Target))
-        llvm_report_error("expected relocatable expression");
-
-      if (!Target.isAbsolute())
-        llvm_unreachable("FIXME: Not yet implemented!");
-      uint64_t OrgOffset = Target.getConstant();
-      uint64_t Offset = Address - SD.getAddress();
+      int64_t TargetLocation;
+      if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout))
+        llvm_report_error("expected assembly-time absolute expression");
 
       // FIXME: We need a way to communicate this error.
-      if (OrgOffset < Offset)
-        llvm_report_error("invalid .org offset '" + Twine(OrgOffset) +
-                          "' (at offset '" + Twine(Offset) + "'");
+      int64_t Offset = TargetLocation - F.getOffset();
+      if (Offset < 0)
+        llvm_report_error("invalid .org offset '" + Twine(TargetLocation) +
+                          "' (at offset '" + Twine(F.getOffset()) + "'");
 
-      F.setFileSize(OrgOffset - Offset);
+      F.setFileSize(Offset);
       break;
     }
 
@@ -1263,6 +1310,43 @@ void MCAssembler::Finish() {
       llvm::errs() << "assembler backend - pre-layout\n--\n";
       dump(); });
 
+  // Layout until everything fits.
+  while (LayoutOnce())
+    continue;
+
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - post-layout\n--\n";
+      dump(); });
+
+  // Write the object file.
+  //
+  // FIXME: Factor out MCObjectWriter.
+  bool Is64Bit = StringRef(getBackend().getTarget().getName()) == "x86-64";
+  MachObjectWriter MOW(OS, Is64Bit);
+  MOW.WriteObject(*this);
+
+  OS.flush();
+}
+
+bool MCAssembler::FixupNeedsRelaxation(MCAsmFixup &Fixup, MCDataFragment *DF) {
+  // FIXME: Share layout object.
+  MCAsmLayout Layout(*this);
+
+  // Currently we only need to relax X86::reloc_pcrel_1byte.
+  if (unsigned(Fixup.Kind) != X86::reloc_pcrel_1byte)
+    return false;
+
+  // If we cannot resolve the fixup value, it requires relaxation.
+  MCValue Target;
+  uint64_t Value;
+  if (!EvaluateFixup(Layout, Fixup, DF, Target, Value))
+    return true;
+
+  // Otherwise, relax if the value is too big for a (signed) i8.
+  return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+bool MCAssembler::LayoutOnce() {
   // Layout the concrete sections and fragments.
   uint64_t Address = 0;
   MCSectionData *Prev = 0;
@@ -1304,20 +1388,94 @@ void MCAssembler::Finish() {
     SD.setAddress(Address);
     LayoutSection(SD);
     Address += SD.getSize();
-
   }
 
-  DEBUG_WITH_TYPE("mc-dump", {
-      llvm::errs() << "assembler backend - post-layout\n--\n";
-      dump(); });
+  // Scan the fixups in order and relax any that don't fit.
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    MCSectionData &SD = *it;
 
-  // Write the object file.
-  MachObjectWriter MOW(OS);
-  MOW.WriteObject(*this);
+    for (MCSectionData::iterator it2 = SD.begin(),
+           ie2 = SD.end(); it2 != ie2; ++it2) {
+      MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
+      if (!DF)
+        continue;
 
-  OS.flush();
-}
+      for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
+             ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
+        MCAsmFixup &Fixup = *it3;
+
+        // Check whether we need to relax this fixup.
+        if (!FixupNeedsRelaxation(Fixup, DF))
+          continue;
+
+        // Relax the instruction.
+        //
+        // FIXME: This is a huge temporary hack which just looks for x86
+        // branches; the only thing we need to relax on x86 is
+        // 'X86::reloc_pcrel_1byte'. Once we have MCInst fragments, this will be
+        // replaced by a TargetAsmBackend hook (most likely tblgen'd) to relax
+        // an individual MCInst.
+        SmallVectorImpl<char> &C = DF->getContents();
+        uint64_t PrevOffset = Fixup.Offset;
+        unsigned Amt = 0;
+
+          // jcc instructions
+        if (unsigned(C[Fixup.Offset-1]) >= 0x70 &&
+            unsigned(C[Fixup.Offset-1]) <= 0x7f) {
+          C[Fixup.Offset] = C[Fixup.Offset-1] + 0x10;
+          C[Fixup.Offset-1] = char(0x0f);
+          ++Fixup.Offset;
+          Amt = 4;
+
+          // jmp rel8
+        } else if (C[Fixup.Offset-1] == char(0xeb)) {
+          C[Fixup.Offset-1] = char(0xe9);
+          Amt = 3;
+
+        } else
+          llvm_unreachable("unknown 1 byte pcrel instruction!");
+
+        Fixup.Value = MCBinaryExpr::Create(
+          MCBinaryExpr::Sub, Fixup.Value,
+          MCConstantExpr::Create(3, getContext()),
+          getContext());
+        C.insert(C.begin() + Fixup.Offset, Amt, char(0));
+        Fixup.Kind = MCFixupKind(X86::reloc_pcrel_4byte);
+
+        // Update the remaining fixups, which have slid.
+        //
+        // FIXME: This is bad for performance, but will be eliminated by the
+        // move to MCInst specific fragments.
+        ++it3;
+        for (; it3 != ie3; ++it3)
+          it3->Offset += Amt;
+
+        // Update all the symbols for this fragment, which may have slid.
+        //
+        // FIXME: This is really really bad for performance, but will be
+        // eliminated by the move to MCInst specific fragments.
+        for (MCAssembler::symbol_iterator it = symbol_begin(),
+               ie = symbol_end(); it != ie; ++it) {
+          MCSymbolData &SD = *it;
 
+          if (it->getFragment() != DF)
+            continue;
+
+          if (SD.getOffset() > PrevOffset)
+            SD.setOffset(SD.getOffset() + Amt);
+        }
+
+        // Restart layout.
+        //
+        // FIXME: This is O(N^2), but will be eliminated once we have a smart
+        // MCAsmLayout object.
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
 
 // Debugging methods
 
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 63264f6..70c89a2 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-MCContext::MCContext() {
+MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
 }
 
 MCContext::~MCContext() {
@@ -23,30 +23,41 @@ MCContext::~MCContext() {
   // we don't need to free them here.
 }
 
-MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
+MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name, bool isTemporary) {
   assert(!Name.empty() && "Normal symbols cannot be unnamed!");
-  MCSymbol *&Entry = Symbols[Name];
-  if (Entry) return Entry;
+  
+  // Do the lookup and get the entire StringMapEntry.  We want access to the
+  // key if we are creating the entry.
+  StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name);
+  if (Entry.getValue()) return Entry.getValue();
 
-  return Entry = new (*this) MCSymbol(Name, false);
+  // Ok, the entry doesn't already exist.  Have the MCSymbol object itself refer
+  // to the copy of the string that is embedded in the StringMapEntry.
+  MCSymbol *Result = new (*this) MCSymbol(Entry.getKey(), isTemporary);
+  Entry.setValue(Result);
+  return Result; 
 }
 
-MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
+MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name, bool isTemporary) {
   SmallString<128> NameSV;
   Name.toVector(NameSV);
-  return GetOrCreateSymbol(NameSV.str());
+  return GetOrCreateSymbol(NameSV.str(), isTemporary);
+}
+
+MCSymbol *MCContext::CreateTempSymbol() {
+  return GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+                                    "tmp" + Twine(NextUniqueID++));
 }
 
 
 MCSymbol *MCContext::GetOrCreateTemporarySymbol(StringRef Name) {
-  // If unnamed, just create a symbol.
+  // If there is no name, create a new anonymous symbol.
+  // FIXME: Remove this.  This form of the method should always take a name.
   if (Name.empty())
-    new (*this) MCSymbol("", true);
-    
-  // Otherwise create as usual.
-  MCSymbol *&Entry = Symbols[Name];
-  if (Entry) return Entry;
-  return Entry = new (*this) MCSymbol(Name, true);
+    return GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+                                      "tmp" + Twine(NextUniqueID++));
+  
+  return GetOrCreateSymbol(Name, true);
 }
 
 MCSymbol *MCContext::GetOrCreateTemporarySymbol(const Twine &Name) {
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 4439eba..a2ed20b 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -8,11 +8,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
 using namespace llvm;
 
 void MCExpr::print(raw_ostream &OS) const {
@@ -24,7 +28,8 @@ void MCExpr::print(raw_ostream &OS) const {
     return;
 
   case MCExpr::SymbolRef: {
-    const MCSymbol &Sym = cast<MCSymbolRefExpr>(*this).getSymbol();
+    const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
+    const MCSymbol &Sym = SRE.getSymbol();
     
     // Parenthesize names that start with $ so that they don't look like
     // absolute names.
@@ -32,6 +37,10 @@ void MCExpr::print(raw_ostream &OS) const {
       OS << '(' << Sym << ')';
     else
       OS << Sym;
+
+    if (SRE.getKind() != MCSymbolRefExpr::VK_None)
+      OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+
     return;
   }
 
@@ -124,28 +133,68 @@ const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) {
   return new (Ctx) MCConstantExpr(Value);
 }
 
+/* *** */
+
 const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
+                                               VariantKind Kind,
                                                MCContext &Ctx) {
-  return new (Ctx) MCSymbolRefExpr(Sym);
+  return new (Ctx) MCSymbolRefExpr(Sym, Kind);
 }
 
-const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, MCContext &Ctx) {
-  return Create(Ctx.GetOrCreateSymbol(Name), Ctx);
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
+                                               MCContext &Ctx) {
+  return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx);
 }
 
 const MCSymbolRefExpr *MCSymbolRefExpr::CreateTemp(StringRef Name,
+                                                   VariantKind Kind,
                                                    MCContext &Ctx) {
-  return Create(Ctx.GetOrCreateTemporarySymbol(Name), Ctx);
+  return Create(Ctx.GetOrCreateTemporarySymbol(Name), Kind, Ctx);
 }
 
+StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
+  switch (Kind) {
+  default:
+  case VK_Invalid: return "<<invalid>>";
+  case VK_None: return "<<none>>";
+
+  case VK_GOT: return "GOT";
+  case VK_GOTOFF: return "GOTOFF";
+  case VK_GOTPCREL: return "GOTPCREL";
+  case VK_GOTTPOFF: return "GOTTPOFF";
+  case VK_INDNTPOFF: return "INDNTPOFF";
+  case VK_NTPOFF: return "NTPOFF";
+  case VK_PLT: return "PLT";
+  case VK_TLSGD: return "TLSGD";
+  case VK_TPOFF: return "TPOFF";
+  }
+}
+
+MCSymbolRefExpr::VariantKind
+MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
+  return StringSwitch<VariantKind>(Name)
+    .Case("GOT", VK_GOT)
+    .Case("GOTOFF", VK_GOTOFF)
+    .Case("GOTPCREL", VK_GOTPCREL)
+    .Case("GOTTPOFF", VK_GOTTPOFF)
+    .Case("INDNTPOFF", VK_INDNTPOFF)
+    .Case("NTPOFF", VK_NTPOFF)
+    .Case("PLT", VK_PLT)
+    .Case("TLSGD", VK_TLSGD)
+    .Case("TPOFF", VK_TPOFF)
+    .Default(VK_Invalid);
+}
+
+/* *** */
+
 void MCTargetExpr::Anchor() {}
 
 /* *** */
 
-bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const {
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
   MCValue Value;
   
-  if (!EvaluateAsRelocatable(Value) || !Value.isAbsolute())
+  if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute())
     return false;
 
   Res = Value.getConstant();
@@ -174,10 +223,11 @@ static bool EvaluateSymbolicAdd(const MCValue &LHS, const MCSymbol *RHS_A,
   return true;
 }
 
-bool MCExpr::EvaluateAsRelocatable(MCValue &Res) const {
+bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
+                                   const MCAsmLayout *Layout) const {
   switch (getKind()) {
   case Target:
-    return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res);
+    return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout);
       
   case Constant:
     Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
@@ -187,8 +237,24 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res) const {
     const MCSymbol &Sym = cast<MCSymbolRefExpr>(this)->getSymbol();
 
     // Evaluate recursively if this is a variable.
-    if (Sym.isVariable())
-      return Sym.getValue()->EvaluateAsRelocatable(Res);
+    if (Sym.isVariable()) {
+      if (!Sym.getValue()->EvaluateAsRelocatable(Res, Layout))
+        return false;
+
+      // Absolutize symbol differences between defined symbols when we have a
+      // layout object and the target requests it.
+      if (Layout && Res.getSymB() &&
+          Layout->getAssembler().getBackend().hasAbsolutizedSet() &&
+          Res.getSymA()->isDefined() && Res.getSymB()->isDefined()) {
+        MCSymbolData &A = Layout->getAssembler().getSymbolData(*Res.getSymA());
+        MCSymbolData &B = Layout->getAssembler().getSymbolData(*Res.getSymB());
+        Res = MCValue::get(+ A.getFragment()->getAddress() + A.getOffset()
+                           - B.getFragment()->getAddress() - B.getOffset()
+                           + Res.getConstant());
+      }
+
+      return true;
+    }
 
     Res = MCValue::get(&Sym, 0, 0);
     return true;
@@ -198,7 +264,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res) const {
     const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
     MCValue Value;
 
-    if (!AUE->getSubExpr()->EvaluateAsRelocatable(Value))
+    if (!AUE->getSubExpr()->EvaluateAsRelocatable(Value, Layout))
       return false;
 
     switch (AUE->getOpcode()) {
@@ -231,8 +297,8 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res) const {
     const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
     MCValue LHSValue, RHSValue;
     
-    if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue) ||
-        !ABE->getRHS()->EvaluateAsRelocatable(RHSValue))
+    if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue, Layout) ||
+        !ABE->getRHS()->EvaluateAsRelocatable(RHSValue, Layout))
       return false;
 
     // We only support a few operations on non-constant expressions, handle
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index a7a8a5d..73b1074 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -16,7 +16,6 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -48,8 +47,6 @@ private:
   MCAssembler Assembler;
   MCCodeEmitter *Emitter;
   MCSectionData *CurSectionData;
-  DenseMap<const MCSection*, MCSectionData*> SectionMap;
-  DenseMap<const MCSymbol*, MCSymbolData*> SymbolMap;
 
 private:
   MCFragment *getCurrentFragment() const {
@@ -61,27 +58,10 @@ private:
     return 0;
   }
 
-  MCSectionData &getSectionData(const MCSection &Section) {
-    MCSectionData *&Entry = SectionMap[&Section];
-
-    if (!Entry)
-      Entry = new MCSectionData(Section, &Assembler);
-
-    return *Entry;
-  }
-
-  MCSymbolData &getSymbolData(const MCSymbol &Symbol) {
-    MCSymbolData *&Entry = SymbolMap[&Symbol];
-
-    if (!Entry)
-      Entry = new MCSymbolData(Symbol, 0, 0, &Assembler);
-
-    return *Entry;
-  }
-
 public:
-  MCMachOStreamer(MCContext &Context, raw_ostream &_OS, MCCodeEmitter *_Emitter)
-    : MCStreamer(Context), Assembler(Context, _OS), Emitter(_Emitter),
+  MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                  raw_ostream &_OS, MCCodeEmitter *_Emitter)
+    : MCStreamer(Context), Assembler(Context, TAB, _OS), Emitter(_Emitter),
       CurSectionData(0) {}
   ~MCMachOStreamer() {}
 
@@ -99,7 +79,8 @@ public:
     }
 
     case MCExpr::SymbolRef:
-      getSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+      Assembler.getOrCreateSymbolData(
+        cast<MCSymbolRefExpr>(Value)->getSymbol());
       break;
 
     case MCExpr::Unary:
@@ -164,7 +145,7 @@ void MCMachOStreamer::SwitchSection(const MCSection *Section) {
   if (Section == CurSection) return;
 
   CurSection = Section;
-  CurSectionData = &getSectionData(*Section);
+  CurSectionData = &Assembler.getOrCreateSectionData(*Section);
 }
 
 void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
@@ -175,7 +156,7 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
   if (!F)
     F = new MCDataFragment(CurSectionData);
 
-  MCSymbolData &SD = getSymbolData(*Symbol);
+  MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
   assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
   SD.setFragment(F);
   SD.setOffset(F->getContents().size());
@@ -203,7 +184,7 @@ void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
 
   // FIXME: Lift context changes into super class.
   // FIXME: Set associated section.
-  Symbol->setValue(Value);
+  Symbol->setValue(AddValueSymbols(Value));
 }
 
 void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
@@ -221,9 +202,9 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   }
 
   // Adding a symbol attribute always introduces the symbol, note that an
-  // important side effect of calling getSymbolData here is to register the
-  // symbol with the assembler.
-  MCSymbolData &SD = getSymbolData(*Symbol);
+  // important side effect of calling getOrCreateSymbolData here is to register
+  // the symbol with the assembler.
+  MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
 
   // The implementation of symbol attributes is designed to match 'as', but it
   // leaves much to desired. It doesn't really make sense to arbitrarily add and
@@ -289,7 +270,7 @@ void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
   // Encode the 'desc' value into the lowest implementation defined bits.
   assert(DescValue == (DescValue & SF_DescFlagsMask) && 
          "Invalid .desc value!");
-  getSymbolData(*Symbol).setFlags(DescValue & SF_DescFlagsMask);
+  Assembler.getOrCreateSymbolData(*Symbol).setFlags(DescValue&SF_DescFlagsMask);
 }
 
 void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -297,14 +278,14 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
   // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
-  MCSymbolData &SD = getSymbolData(*Symbol);
+  MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
   SD.setExternal(true);
   SD.setCommon(Size, ByteAlignment);
 }
 
 void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
                                    unsigned Size, unsigned ByteAlignment) {
-  MCSectionData &SectData = getSectionData(*Section);
+  MCSectionData &SectData = Assembler.getOrCreateSectionData(*Section);
 
   // The symbol may not be present, which only creates the section.
   if (!Symbol)
@@ -314,7 +295,7 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
 
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
-  MCSymbolData &SD = getSymbolData(*Symbol);
+  MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
 
   MCFragment *F = new MCZeroFillFragment(Size, ByteAlignment, &SectData);
   SD.setFragment(F);
@@ -346,9 +327,8 @@ void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
     for (unsigned i = 0; i != Size; ++i)
       DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
   } else {
-    DF->getFixups().push_back(MCAsmFixup(DF->getContents().size(),
-                                         *AddValueSymbols(Value),
-                                         MCFixup::getKindForSize(Size)));
+    DF->addFixup(MCAsmFixup(DF->getContents().size(), *AddValueSymbols(Value),
+                            MCFixup::getKindForSize(Size)));
     DF->getContents().resize(DF->getContents().size() + Size, 0);
   }
 }
@@ -407,9 +387,8 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
     DF = new MCDataFragment(CurSectionData);
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
     MCFixup &F = Fixups[i];
-    DF->getFixups().push_back(MCAsmFixup(DF->getContents().size()+F.getOffset(),
-                                         *F.getValue(),
-                                         F.getKind()));
+    DF->addFixup(MCAsmFixup(DF->getContents().size()+F.getOffset(),
+                            *F.getValue(), F.getKind()));
   }
   DF->getContents().append(Code.begin(), Code.end());
 }
@@ -418,7 +397,7 @@ void MCMachOStreamer::Finish() {
   Assembler.Finish();
 }
 
-MCStreamer *llvm::createMachOStreamer(MCContext &Context, raw_ostream &OS,
-                                      MCCodeEmitter *CE) {
-  return new MCMachOStreamer(Context, OS, CE);
+MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                      raw_ostream &OS, MCCodeEmitter *CE) {
+  return new MCMachOStreamer(Context, TAB, OS, CE);
 }
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 482eefd..22c8d76 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -140,8 +140,14 @@ AsmToken AsmLexer::LexDigit() {
     StringRef Result(TokStart, CurPtr - TokStart);
     
     long long Value;
-    if (Result.getAsInteger(10, Value))
-      return ReturnError(TokStart, "Invalid decimal number");
+    if (Result.getAsInteger(10, Value)) {
+      // We have to handle minint_as_a_positive_value specially, because
+      // - minint_as_a_positive_value = minint and it is valid.
+      if (Result == "9223372036854775808")
+        Value = -9223372036854775808ULL;
+      else
+        return ReturnError(TokStart, "Invalid decimal number");
+    }
     return AsmToken(AsmToken::Integer, Result, Value);
   }
   
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index fc8d549..4ec5247 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -20,7 +20,6 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/SourceMgr.h"
@@ -139,15 +138,14 @@ const AsmToken &AsmParser::Lex() {
   return *tok;
 }
 
-bool AsmParser::Run() {
-  // Create the initial section.
+bool AsmParser::Run(bool NoInitialTextSection) {
+  // Create the initial section, if requested.
   //
-  // FIXME: Support -n.
   // FIXME: Target hook & command line option for initial section.
-  Out.SwitchSection(getMachOSection("__TEXT", "__text",
-                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
-                                    0, SectionKind::getText()));
-
+  if (!NoInitialTextSection)
+    Out.SwitchSection(getMachOSection("__TEXT", "__text",
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      0, SectionKind::getText()));
 
   // Prime the lexer.
   Lex();
@@ -264,19 +262,29 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   case AsmToken::String:
   case AsmToken::Identifier: {
     // This is a symbol reference.
-    MCSymbol *Sym = CreateSymbol(getTok().getIdentifier());
+    std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@');
+    MCSymbol *Sym = CreateSymbol(Split.first);
+
+    // Lookup the symbol variant if used.
+    MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+    if (Split.first.size() != getTok().getIdentifier().size())
+      Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
+
     EndLoc = Lexer.getLoc();
     Lex(); // Eat identifier.
 
     // If this is an absolute variable reference, substitute it now to preserve
     // semantics in the face of reassignment.
     if (Sym->getValue() && isa<MCConstantExpr>(Sym->getValue())) {
+      if (Variant)
+        return Error(EndLoc, "unexpected modified on variable reference");
+
       Res = Sym->getValue();
       return false;
     }
 
     // Otherwise create a symbol ref.
-    Res = MCSymbolRefExpr::Create(Sym, getContext());
+    Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
     return false;
   }
   case AsmToken::Integer:
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index eb046d0..345a78c 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -136,10 +136,11 @@ MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size,
 /// returns an empty buffer.
 MemoryBuffer *MemoryBuffer::getFileOrSTDIN(StringRef Filename,
                                            std::string *ErrStr,
-                                           int64_t FileSize) {
+                                           int64_t FileSize,
+                                           struct stat *FileInfo) {
   if (Filename == "-")
     return getSTDIN();
-  return getFile(Filename, ErrStr, FileSize);
+  return getFile(Filename, ErrStr, FileSize, FileInfo);
 }
 
 //===----------------------------------------------------------------------===//
@@ -169,7 +170,7 @@ public:
 }
 
 MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
-                                    int64_t FileSize) {
+                                    int64_t FileSize, struct stat *FileInfo) {
   int OpenFlags = 0;
 #ifdef O_BINARY
   OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
@@ -183,15 +184,17 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
   
   // If we don't know the file size, use fstat to find out.  fstat on an open
   // file descriptor is cheaper than stat on a random path.
-  if (FileSize == -1) {
-    struct stat FileInfo;
+  if (FileSize == -1 || FileInfo) {
+    struct stat MyFileInfo;
+    struct stat *FileInfoPtr = FileInfo? FileInfo : &MyFileInfo;
+    
     // TODO: This should use fstat64 when available.
-    if (fstat(FD, &FileInfo) == -1) {
+    if (fstat(FD, FileInfoPtr) == -1) {
       if (ErrStr) *ErrStr = strerror(errno);
       ::close(FD);
       return 0;
     }
-    FileSize = FileInfo.st_size;
+    FileSize = FileInfoPtr->st_size;
   }
   
   
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
index a56a1f7..b43c3af 100644
--- a/lib/System/CMakeLists.txt
+++ b/lib/System/CMakeLists.txt
@@ -12,10 +12,12 @@ add_llvm_library(LLVMSystem
   Process.cpp
   Program.cpp
   RWMutex.cpp
+  SearchForAddressOfSpecialSymbol.cpp
   Signals.cpp
   ThreadLocal.cpp
   Threading.cpp
   TimeValue.cpp
+  Valgrind.cpp
   Unix/Alarm.inc
   Unix/Host.inc
   Unix/Memory.inc
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
index 63baa6d..d6f3140 100644
--- a/lib/System/DynamicLibrary.cpp
+++ b/lib/System/DynamicLibrary.cpp
@@ -69,44 +69,8 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
   return false;
 }
 
-static void *SearchForAddressOfSpecialSymbol(const char* symbolName) {
-#define EXPLICIT_SYMBOL(SYM) \
-   extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
-
-  // If this is darwin, it has some funky issues, try to solve them here.  Some
-  // important symbols are marked 'private external' which doesn't allow
-  // SearchForAddressOfSymbol to find them.  As such, we special case them here,
-  // there is only a small handful of them.
-
-#ifdef __APPLE__
-  {
-    EXPLICIT_SYMBOL(__ashldi3);
-    EXPLICIT_SYMBOL(__ashrdi3);
-    EXPLICIT_SYMBOL(__cmpdi2);
-    EXPLICIT_SYMBOL(__divdi3);
-    EXPLICIT_SYMBOL(__eprintf);
-    EXPLICIT_SYMBOL(__fixdfdi);
-    EXPLICIT_SYMBOL(__fixsfdi);
-    EXPLICIT_SYMBOL(__fixunsdfdi);
-    EXPLICIT_SYMBOL(__fixunssfdi);
-    EXPLICIT_SYMBOL(__floatdidf);
-    EXPLICIT_SYMBOL(__floatdisf);
-    EXPLICIT_SYMBOL(__lshrdi3);
-    EXPLICIT_SYMBOL(__moddi3);
-    EXPLICIT_SYMBOL(__udivdi3);
-    EXPLICIT_SYMBOL(__umoddi3);
-  }
-#endif
-
-#ifdef __CYGWIN__
-  {
-    EXPLICIT_SYMBOL(_alloca);
-    EXPLICIT_SYMBOL(__main);
-  }
-#endif
-
-#undef EXPLICIT_SYMBOL
-  return 0;
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName);
 }
 
 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
@@ -132,7 +96,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
     }
   }
 
-  if (void *Result = SearchForAddressOfSpecialSymbol(symbolName))
+  if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
     return Result;
 
 // This macro returns the address of a well-known, explicit symbol
diff --git a/lib/System/Memory.cpp b/lib/System/Memory.cpp
index e2d838d..ef23b8d 100644
--- a/lib/System/Memory.cpp
+++ b/lib/System/Memory.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/System/Memory.h"
+#include "llvm/System/Valgrind.h"
 #include "llvm/Config/config.h"
 
 namespace llvm {
@@ -68,4 +69,6 @@ void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
 #  endif
 
 #endif  // end apple
+
+  ValgrindDiscardTranslations(Addr, Len);
 }
diff --git a/lib/System/SearchForAddressOfSpecialSymbol.cpp b/lib/System/SearchForAddressOfSpecialSymbol.cpp
new file mode 100644
index 0000000..73b484c
--- /dev/null
+++ b/lib/System/SearchForAddressOfSpecialSymbol.cpp
@@ -0,0 +1,64 @@
+//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file pulls the addresses of certain symbols out of the linker.  It must
+//  include as few header files as possible because it declares the symbols as
+//  void*, which would conflict with the actual symbol type if any header
+//  declared it.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string.h>
+
+// Must declare the symbols in the global namespace.
+static void *DoSearch(const char* symbolName) {
+#define EXPLICIT_SYMBOL(SYM) \
+   extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
+
+  // If this is darwin, it has some funky issues, try to solve them here.  Some
+  // important symbols are marked 'private external' which doesn't allow
+  // SearchForAddressOfSymbol to find them.  As such, we special case them here,
+  // there is only a small handful of them.
+
+#ifdef __APPLE__
+  {
+    EXPLICIT_SYMBOL(__ashldi3);
+    EXPLICIT_SYMBOL(__ashrdi3);
+    EXPLICIT_SYMBOL(__cmpdi2);
+    EXPLICIT_SYMBOL(__divdi3);
+    EXPLICIT_SYMBOL(__eprintf);
+    EXPLICIT_SYMBOL(__fixdfdi);
+    EXPLICIT_SYMBOL(__fixsfdi);
+    EXPLICIT_SYMBOL(__fixunsdfdi);
+    EXPLICIT_SYMBOL(__fixunssfdi);
+    EXPLICIT_SYMBOL(__floatdidf);
+    EXPLICIT_SYMBOL(__floatdisf);
+    EXPLICIT_SYMBOL(__lshrdi3);
+    EXPLICIT_SYMBOL(__moddi3);
+    EXPLICIT_SYMBOL(__udivdi3);
+    EXPLICIT_SYMBOL(__umoddi3);
+  }
+#endif
+
+#ifdef __CYGWIN__
+  {
+    EXPLICIT_SYMBOL(_alloca);
+    EXPLICIT_SYMBOL(__main);
+  }
+#endif
+
+#undef EXPLICIT_SYMBOL
+  return 0;
+}
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName) {
+  return DoSearch(symbolName);
+}
+}  // namespace llvm
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index c10498a..b4cc875 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -113,8 +113,9 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
   }
 
   // Install it as the requested FD
-  if (-1 == dup2(InFD, FD)) {
+  if (dup2(InFD, FD) == -1) {
     MakeErrMsg(ErrMsg, "Cannot dup2");
+    close(InFD);
     return true;
   }
   close(InFD);      // Close the original FD
diff --git a/lib/System/Valgrind.cpp b/lib/System/Valgrind.cpp
new file mode 100644
index 0000000..c76cfe4
--- /dev/null
+++ b/lib/System/Valgrind.cpp
@@ -0,0 +1,54 @@
+//===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Defines Valgrind communication methods, if HAVE_VALGRIND_VALGRIND_H is
+//  defined.  If we have valgrind.h but valgrind isn't running, its macros are
+//  no-ops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Valgrind.h"
+#include "llvm/Config/config.h"
+
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+
+static bool InitNotUnderValgrind() {
+  return !RUNNING_ON_VALGRIND;
+}
+
+// This bool is negated from what we'd expect because code may run before it
+// gets initialized.  If that happens, it will appear to be 0 (false), and we
+// want that to cause the rest of the code in this file to run the
+// Valgrind-provided macros.
+static const bool NotUnderValgrind = InitNotUnderValgrind();
+
+bool llvm::sys::RunningOnValgrind() {
+  if (NotUnderValgrind)
+    return false;
+  return RUNNING_ON_VALGRIND;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+  if (NotUnderValgrind)
+    return;
+
+  VALGRIND_DISCARD_TRANSLATIONS(Addr, Len);
+}
+
+#else  // !HAVE_VALGRIND_VALGRIND_H
+
+bool llvm::sys::RunningOnValgrind() {
+  return false;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+}
+
+#endif  // !HAVE_VALGRIND_VALGRIND_H
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
index b5f6374..5a0052f 100644
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@@ -126,7 +126,7 @@ Path::isValid() const {
 }
 
 void Path::makeAbsolute() {
-  TCHAR  FullPath[MAX_PATH + 1] = {0}; 
+  TCHAR  FullPath[MAX_PATH + 1] = {0};
   LPTSTR FilePart = NULL;
 
   DWORD RetLength = ::GetFullPathNameA(path.c_str(),
@@ -161,7 +161,7 @@ Path::isAbsolute(const char *NameStart, unsigned NameLen) {
   }
 }
 
-bool 
+bool
 Path::isAbsolute() const {
   // FIXME: This does not handle correctly an absolute path starting from
   // a drive letter or in UNC format.
@@ -174,9 +174,9 @@ Path::isAbsolute() const {
     default:
       return path[0] == '/' || (path[1] == ':' && path[2] == '/');
   }
-} 
+}
 
-static Path *TempDirectory = NULL;
+static Path *TempDirectory;
 
 Path
 Path::GetTemporaryDirectory(std::string* ErrMsg) {
@@ -266,7 +266,7 @@ Path
 Path::GetCurrentDirectory() {
   char pathname[MAX_PATH];
   ::GetCurrentDirectoryA(MAX_PATH,pathname);
-  return Path(pathname);  
+  return Path(pathname);
 }
 
 /// GetMainExecutable - Return the path to the main executable, given the
@@ -448,7 +448,7 @@ Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
     MakeErrMsg(ErrMsg, path + ": can't get status of file");
     return true;
   }
-    
+
   if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
     if (ErrMsg)
       *ErrMsg = path + ": not a directory";
@@ -617,7 +617,7 @@ Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
       *next = 0;
       if (!CreateDirectory(pathname, NULL) &&
           GetLastError() != ERROR_ALREADY_EXISTS)
-          return MakeErrMsg(ErrMsg, 
+          return MakeErrMsg(ErrMsg,
             std::string(pathname) + ": Can't create directory: ");
       *next++ = '/';
     }
@@ -649,7 +649,7 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
   WIN32_FILE_ATTRIBUTE_DATA fi;
   if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi))
     return true;
-    
+
   if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
     // If it doesn't exist, we're done.
     if (!exists())
@@ -706,7 +706,7 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
 
     pathname[lastchar] = 0;
     if (!RemoveDirectory(pathname))
-      return MakeErrMsg(ErrStr, 
+      return MakeErrMsg(ErrStr,
         std::string(pathname) + ": Can't destroy directory: ");
     return false;
   } else {
@@ -753,7 +753,7 @@ bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
 bool
 Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
   if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING))
-    return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path 
+    return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path
         + "': ");
   return false;
 }
@@ -764,7 +764,7 @@ Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
   if (!si.isFile) {
     return true;
   }
-  
+
   HANDLE h = CreateFile(path.c_str(),
                         FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
                         FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 7033861..bbb1dbd 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -40,6 +40,8 @@ def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
                                    "Enable NEON instructions">;
 def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
                                      "Enable Thumb2 instructions">;
+def FeatureFP16   : SubtargetFeature<"fp16", "HasFP16", "true",
+                                     "Enable half-precision floating point">;
 
 //===----------------------------------------------------------------------===//
 // ARM Processors supported.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0194231..767d5ec 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -58,12 +58,13 @@ namespace ARMII {
     Size4Bytes    = 3,
     Size2Bytes    = 4,
 
-    // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
-    // and store ops
+    // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+    // and store ops only.  Generic "updating" flag is used for ld/st multiple.
     IndexModeShift = 7,
     IndexModeMask  = 3 << IndexModeShift,
     IndexModePre   = 1,
     IndexModePost  = 2,
+    IndexModeUpd   = 3,
 
     //===------------------------------------------------------------------===//
     // Instruction encoding formats.
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index d9b8323..11e1c48 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -590,6 +590,10 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
       AFI->isThumb2Function())
     MF.getRegInfo().setPhysRegUsed(ARM::R4);
 
+  // Spill LR if Thumb1 function uses variable length argument lists.
+  if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
+    MF.getRegInfo().setPhysRegUsed(ARM::LR);
+
   // Don't spill FP if the frame can be eliminated. This is determined
   // by scanning the callee-save registers to see if any is used.
   const unsigned *CSRegs = getCalleeSavedRegs();
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 108a244..334c820 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -51,6 +51,7 @@ namespace {
     const ARMSubtarget        *Subtarget;
     TargetMachine             &TM;
     JITCodeEmitter            &MCE;
+    MachineModuleInfo *MMI;
     const std::vector<MachineConstantPoolEntry> *MCPEs;
     const std::vector<MachineJumpTableEntry> *MJTEs;
     bool IsPIC;
@@ -182,7 +183,8 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
   if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
   IsPIC = TM.getRelocationModel() == Reloc::PIC_;
   JTI->Initialize(MF, IsPIC);
-  MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>());
+  MMI = &getAnalysis<MachineModuleInfo>();
+  MCE.setModuleInfo(MMI);
 
   do {
     DEBUG(errs() << "JITTing function '"
@@ -436,8 +438,8 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
 void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
   const MachineOperand &MO0 = MI.getOperand(0);
   const MachineOperand &MO1 = MI.getOperand(1);
-  assert(MO1.isImm() && ARM_AM::getSOImmVal(MO1.isImm()) != -1 &&
-                                            "Not a valid so_imm value!");
+  assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
+                                                  "Not a valid so_imm value!");
   unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
   unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
 
@@ -563,7 +565,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
   }
   case TargetOpcode::DBG_LABEL:
   case TargetOpcode::EH_LABEL:
-    MCE.emitLabel(MI.getOperand(0).getImm());
+    MCE.emitLabel(MI.getOperand(0).getMCSymbol());
     break;
   case TargetOpcode::IMPLICIT_DEF:
   case TargetOpcode::KILL:
@@ -925,19 +927,26 @@ static unsigned getAddrModeUPBits(unsigned Mode) {
   return Binary;
 }
 
-void ARMCodeEmitter::emitLoadStoreMultipleInstruction(
-                                                       const MachineInstr &MI) {
+void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
 
   // Set the conditional execution predicate
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
+  // Skip operand 0 of an instruction with base register update.
+  unsigned OpIdx = 0;
+  if (IsUpdating)
+    ++OpIdx;
+
   // Set base address operand
-  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift;
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
   // Set addressing mode by modifying bits U(23) and P(24)
-  const MachineOperand &MO = MI.getOperand(1);
+  const MachineOperand &MO = MI.getOperand(OpIdx++);
   Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
 
   // Set bit W(21)
@@ -945,7 +954,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(
     Binary |= 0x1 << ARMII::W_BitShift;
 
   // Set registers
-  for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) {
+  for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
       break;
@@ -1322,17 +1331,25 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
 
 void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(
                                                        const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
 
   // Set the conditional execution predicate
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
+  // Skip operand 0 of an instruction with base register update.
+  unsigned OpIdx = 0;
+  if (IsUpdating)
+    ++OpIdx;
+
   // Set base address operand
-  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift;
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
   // Set addressing mode by modifying bits U(23) and P(24)
-  const MachineOperand &MO = MI.getOperand(1);
+  const MachineOperand &MO = MI.getOperand(OpIdx++);
   Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
 
   // Set bit W(21)
@@ -1340,11 +1357,11 @@ void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(
     Binary |= 0x1 << ARMII::W_BitShift;
 
   // First register is encoded in Dd.
-  Binary |= encodeVFPRd(MI, 5);
+  Binary |= encodeVFPRd(MI, OpIdx+2);
 
   // Number of registers are encoded in offset field.
   unsigned NumRegs = 1;
-  for (unsigned i = 6, e = MI.getNumOperands(); i != e; ++i) {
+  for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
       break;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 8fa3c04..1c5bd42 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1473,11 +1473,10 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
   bool MadeChange = false;
   for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
     MachineInstr *MI = PushPopMIs[i];
-    // First two operands are predicates, the third is a zero since there
-    // is no writeback.
+    // First two operands are predicates.
     if (MI->getOpcode() == ARM::tPOP_RET &&
-        MI->getOperand(3).getReg() == ARM::PC &&
-        MI->getNumExplicitOperands() == 4) {
+        MI->getOperand(2).getReg() == ARM::PC &&
+        MI->getNumExplicitOperands() == 3) {
       BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
       MI->eraseFromParent();
       MadeChange = true;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 3dd0313..8f20843 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -130,7 +130,7 @@ void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
 
 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
   if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
-    return new ARMMachOTargetObjectFile();
+    return new TargetLoweringObjectFileMachO();
 
   return new ARMElfTargetObjectFile();
 }
@@ -426,12 +426,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 
-  // int <-> fp are custom expanded into bit_convert + ARMISD ops.
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
-    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
-    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
-    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+  // Various VFP goodness
+  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+    if (Subtarget->hasVFP2()) {
+      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+    }
+    // Special handling for half-precision FP.
+    if (Subtarget->hasVFP3() && Subtarget->hasFP16()) {
+      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Custom);
+      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Custom);
+    }
   }
 
   // We have target-specific dag combine patterns for the following nodes:
@@ -491,6 +499,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
   case ARMISD::SITOF:         return "ARMISD::SITOF";
   case ARMISD::UITOF:         return "ARMISD::UITOF";
+  case ARMISD::F16_TO_F32:    return "ARMISD::F16_TO_F32";
+  case ARMISD::F32_TO_F16:    return "ARMISD::F32_TO_F16";
 
   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
@@ -1972,8 +1982,21 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
 
 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
-  unsigned Opc =
-    Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI;
+  unsigned Opc;
+
+  switch (Op.getOpcode()) {
+  default:
+    assert(0 && "Invalid opcode!");
+  case ISD::FP32_TO_FP16:
+    Opc = ARMISD::F32_TO_F16;
+    break;
+  case ISD::FP_TO_SINT:
+    Opc = ARMISD::FTOSI;
+    break;
+  case ISD::FP_TO_UINT:
+    Opc = ARMISD::FTOUI;
+    break;
+  }
   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
 }
@@ -1981,8 +2004,21 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
-  unsigned Opc =
-    Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF;
+  unsigned Opc;
+
+  switch (Op.getOpcode()) {
+  default:
+    assert(0 && "Invalid opcode!");
+  case ISD::FP16_TO_FP32:
+    Opc = ARMISD::F16_TO_F32;
+    break;
+  case ISD::SINT_TO_FP:
+    Opc = ARMISD::SITOF;
+    break;
+  case ISD::UINT_TO_FP:
+    Opc = ARMISD::UITOF;
+    break;
+  }
 
   Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
   return DAG.getNode(Opc, dl, VT, Op);
@@ -3042,8 +3078,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
+  case ISD::FP16_TO_FP32:
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
+  case ISD::FP32_TO_FP16:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
@@ -4411,6 +4449,9 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
       break;
     }
   }
+  if (StringRef("{cc}").equals_lower(Constraint))
+    return std::make_pair(0U, ARM::CCRRegisterClass);
+
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index f8f8adc..d7b2ba3 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -59,6 +59,8 @@ namespace llvm {
       FTOUI,        // FP to uint within a FP register.
       SITOF,        // sint to FP within a FP register.
       UITOF,        // uint to FP within a FP register.
+      F16_TO_F32,   // Half FP to single FP within a FP register.
+      F32_TO_F16,   // Single FP to half FP within a FP register.
 
       SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
       SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 76595fa..258a96b 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -112,6 +112,7 @@ class IndexMode<bits<2> val> {
 def IndexModeNone : IndexMode<0>;
 def IndexModePre  : IndexMode<1>;
 def IndexModePost : IndexMode<2>;
+def IndexModeUpd  : IndexMode<3>;
 
 // Instruction execution domain.
 class Domain<bits<2> val> {
@@ -144,6 +145,23 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
   let PrintMethod = "printSBitModifierOperand";
 }
 
+// ARM special operands for disassembly only.
+//
+
+def cps_opt : Operand<i32> {
+  let PrintMethod = "printCPSOptionOperand";
+}
+
+def msr_mask : Operand<i32> {
+  let PrintMethod = "printMSRMaskOperand";
+}
+
+// A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
+// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
+def neg_zero : Operand<i32> {
+  let PrintMethod = "printNegZeroOperand";
+}
+
 //===----------------------------------------------------------------------===//
 
 // ARM Instruction templates.
@@ -835,18 +853,18 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
 
 
 // addrmode4 instructions
-class AXI4ld<dag oops, dag iops, Format f, InstrItinClass itin,
-             string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
+class AXI4ld<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+             string asm, string cstr, list<dag> pattern>
+  : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin,
+       asm, cstr, pattern> {
   let Inst{20}    = 1; // L bit
   let Inst{22}    = 0; // S bit
   let Inst{27-25} = 0b100;
 }
-class AXI4st<dag oops, dag iops, Format f, InstrItinClass itin,
-             string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, itin,
-       asm, "", pattern> {
+class AXI4st<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+             string asm, string cstr, list<dag> pattern>
+  : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin,
+       asm, cstr, pattern> {
   let Inst{20}    = 0; // L bit
   let Inst{22}    = 0; // S bit
   let Inst{27-25} = 0b100;
@@ -980,9 +998,9 @@ class T1JTI<dag oops, dag iops, InstrItinClass itin,
 
 // Two-address instructions
 class T1It<dag oops, dag iops, InstrItinClass itin,
-           string asm, list<dag> pattern>
+           string asm, string cstr, list<dag> pattern>
   : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, 
-            asm, "$lhs = $dst", pattern>;
+            asm, cstr, pattern>;
 
 // Thumb1 instruction that can either be predicated or set CPSR.
 class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1179,6 +1197,10 @@ class T2Ix2<dag oops, dag iops, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
   : Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>;
 
+// Two-address instructions
+class T2XIt<dag oops, dag iops, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, cstr, pattern>;
 
 // T2Iidxldst - Thumb2 indexed load / store instructions.
 class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
@@ -1293,10 +1315,10 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
 }
 
 // Load / store multiple
-class AXDI5<dag oops, dag iops, InstrItinClass itin,
-            string asm, list<dag> pattern>
-  : VFPXI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
-       VFPLdStMulFrm, itin, asm, "", pattern> {
+class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+       VFPLdStMulFrm, itin, asm, cstr, pattern> {
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-25} = 0b110;
   let Inst{11-8}  = 0b1011;
@@ -1305,10 +1327,10 @@ class AXDI5<dag oops, dag iops, InstrItinClass itin,
   let Dom = VFPNeonDomain.Value;
 }
 
-class AXSI5<dag oops, dag iops, InstrItinClass itin,
-            string asm, list<dag> pattern>
-  : VFPXI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
-       VFPLdStMulFrm, itin, asm, "", pattern> {
+class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+       VFPLdStMulFrm, itin, asm, cstr, pattern> {
   // TODO: Mark the instructions with the appropriate subtarget info.
   let Inst{27-25} = 0b110;
   let Inst{11-8}  = 0b1010;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index af82a5f..3fc37da 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -378,7 +378,7 @@ def am3offset : Operand<i32>,
 def addrmode4 : Operand<i32>,
                 ComplexPattern<i32, 2, "SelectAddrMode4", []> {
   let PrintMethod = "printAddrMode4Operand";
-  let MIOperandInfo = (ops GPR, i32imm);
+  let MIOperandInfo = (ops GPR:$addr, i32imm);
 }
 
 // addrmode5 := reg +/- imm8*4
@@ -386,7 +386,7 @@ def addrmode4 : Operand<i32>,
 def addrmode5 : Operand<i32>,
                 ComplexPattern<i32, 2, "SelectAddrMode5", []> {
   let PrintMethod = "printAddrMode5Operand";
-  let MIOperandInfo = (ops GPR, i32imm);
+  let MIOperandInfo = (ops GPR:$base, i32imm);
 }
 
 // addrmode6 := reg with optional writeback
@@ -710,7 +710,7 @@ def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
 // opt{5} = changemode from Inst{17}
 // opt{8-6} = AIF from Inst{8-6}
 // opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
-def CPS : AXI<(outs),(ins i32imm:$opt), MiscFrm, NoItinerary, "cps${opt:cps}",
+def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt",
               [/* For disassembly only; pattern left blank */]>,
           Requires<[IsARM]> {
   let Inst{31-28} = 0b1111;
@@ -721,9 +721,12 @@ def CPS : AXI<(outs),(ins i32imm:$opt), MiscFrm, NoItinerary, "cps${opt:cps}",
 
 // Preload signals the memory system of possible future data/instruction access.
 // These are for disassembly only.
+//
+// A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
+// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
 multiclass APreLoad<bit data, bit read, string opc> {
 
-  def i : AXI<(outs), (ins GPR:$base, i32imm:$imm), MiscFrm, NoItinerary,
+  def i : AXI<(outs), (ins GPR:$base, neg_zero:$imm), MiscFrm, NoItinerary,
                !strconcat(opc, "\t[$base, $imm]"), []> {
     let Inst{31-26} = 0b111101;
     let Inst{25} = 0; // 0 for immediate form
@@ -903,10 +906,11 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
 // FIXME: Should pc be an implicit operand like PICADD, etc?
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1 in
-  def LDM_RET : AXI4ld<(outs),
-                    (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-                    LdStMulFrm, IIC_Br, "ldm${addr:submode}${p}\t$addr, $wb",
-                    []>;
+  def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
+                                        reglist:$dsts, variable_ops),
+                       IndexModeUpd, LdStMulFrm, IIC_Br,
+                       "ldm${addr:submode}${p}\t$addr, $dsts",
+                       "$addr.addr = $wb", []>;
 
 // On non-Darwin platforms R9 is callee-saved.
 let isCall = 1,
@@ -1341,17 +1345,31 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb),
 //  Load / store multiple Instructions.
 //
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-def LDM : AXI4ld<(outs),
-               (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-               LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr, $wb",
-               []>;
-
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-def STM : AXI4st<(outs),
-               (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-               LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr, $wb",
-               []>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
+                          reglist:$dsts, variable_ops),
+                 IndexModeNone, LdStMulFrm, IIC_iLoadm,
+                 "ldm${addr:submode}${p}\t$addr, $dsts", "", []>;
+
+def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
+                                      reglist:$dsts, variable_ops),
+                     IndexModeUpd, LdStMulFrm, IIC_iLoadm,
+                     "ldm${addr:submode}${p}\t$addr, $dsts",
+                     "$addr.addr = $wb", []>;
+} // mayLoad, hasExtraDefRegAllocReq
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
+                          reglist:$srcs, variable_ops),
+                 IndexModeNone, LdStMulFrm, IIC_iStorem,
+                 "stm${addr:submode}${p}\t$addr, $srcs", "", []>;
+
+def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
+                                      reglist:$srcs, variable_ops),
+                     IndexModeUpd, LdStMulFrm, IIC_iStorem,
+                     "stm${addr:submode}${p}\t$addr, $srcs",
+                     "$addr.addr = $wb", []>;
+} // mayStore, hasExtraSrcRegAllocReq
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
@@ -2844,29 +2862,29 @@ def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr",
   let Inst{7-4} = 0b0000;
 }
 
-def MSR : ABI<0b0001, (outs), (ins GPR:$src, i32imm:$mask), NoItinerary, "msr",
-              "\tcpsr${mask:msr}, $src",
+def MSR : ABI<0b0001, (outs), (ins GPR:$src, msr_mask:$mask), NoItinerary,
+              "msr", "\tcpsr$mask, $src",
               [/* For disassembly only; pattern left blank */]> {
   let Inst{23-20} = 0b0010;
   let Inst{7-4} = 0b0000;
 }
 
-def MSRi : ABI<0b0011, (outs), (ins so_imm:$a, i32imm:$mask), NoItinerary,"msr",
-              "\tcpsr${mask:msr}, $a",
+def MSRi : ABI<0b0011, (outs), (ins so_imm:$a, msr_mask:$mask), NoItinerary,
+              "msr", "\tcpsr$mask, $a",
               [/* For disassembly only; pattern left blank */]> {
   let Inst{23-20} = 0b0010;
   let Inst{7-4} = 0b0000;
 }
 
-def MSRsys : ABI<0b0001, (outs), (ins GPR:$src, i32imm:$mask),NoItinerary,"msr",
-              "\tspsr${mask:msr}, $src",
+def MSRsys : ABI<0b0001, (outs), (ins GPR:$src, msr_mask:$mask), NoItinerary,
+              "msr", "\tspsr$mask, $src",
               [/* For disassembly only; pattern left blank */]> {
   let Inst{23-20} = 0b0110;
   let Inst{7-4} = 0b0000;
 }
 
-def MSRsysi : ABI<0b0011,(outs),(ins so_imm:$a, i32imm:$mask),NoItinerary,"msr",
-              "\tspsr${mask:msr}, $a",
+def MSRsysi : ABI<0b0011, (outs), (ins so_imm:$a, msr_mask:$mask), NoItinerary,
+              "msr", "\tspsr$mask, $a",
               [/* For disassembly only; pattern left blank */]> {
   let Inst{23-20} = 0b0110;
   let Inst{7-4} = 0b0000;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7c44a2f..8fee6fa 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -98,16 +98,6 @@ def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
 // NEON operand definitions
 //===----------------------------------------------------------------------===//
 
-// addrmode_neonldstm := reg
-//
-/* TODO: Take advantage of vldm.
-def addrmode_neonldstm : Operand<i32>,
-                ComplexPattern<i32, 2, "SelectAddrModeNeonLdStM", []> {
-  let PrintMethod = "printAddrNeonLdStMOperand";
-  let MIOperandInfo = (ops GPR, i32imm);
-}
-*/
-
 def h8imm  : Operand<i8> {
   let PrintMethod = "printHex8ImmOperand";
 }
@@ -125,26 +115,6 @@ def h64imm : Operand<i64> {
 // NEON load / store instructions
 //===----------------------------------------------------------------------===//
 
-/* TODO: Take advantage of vldm.
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-def VLDMD : NI<(outs),
-               (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
-               IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> {
-  let Inst{27-25} = 0b110;
-  let Inst{20}    = 1;
-  let Inst{11-9}  = 0b101;
-}
-
-def VLDMS : NI<(outs),
-               (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
-               IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> {
-  let Inst{27-25} = 0b110;
-  let Inst{20}    = 1;
-  let Inst{11-9}  = 0b101;
-}
-}
-*/
-
 // Use vldmia to load a Q register as a D register pair.
 def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm,
                 "vldmia", "$addr, ${dst:dregpair}",
@@ -2738,7 +2708,7 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
 
 def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
           (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
-def : Pat<(v2f64 (scalar_to_vector DPR:$src)),
+def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
           (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>;
 def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
           (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 786dd65..37c9fc5 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -201,7 +201,7 @@ def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
 //
 // The opt{4-0} and opt{5} sub-fields are to accommodate 32-bit Thumb and ARM
 // CPS which has more options.
-def tCPS : T1I<(outs), (ins i32imm:$opt), NoItinerary, "cps${opt:cps}",
+def tCPS : T1I<(outs), (ins cps_opt:$opt), NoItinerary, "cps$opt",
               [/* For disassembly only; pattern left blank */]>,
            T1Misc<0b0110011>;
 
@@ -290,8 +290,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
 // FIXME: remove when we have a way to marking a MI with these properties.
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1 in
-def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
-                   "pop${p}\t$wb", []>,
+def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
+                   "pop${p}\t$dsts", []>,
                T1Misc<{1,1,0,?,?,?,?}>;
 
 let isCall = 1,
@@ -539,28 +539,37 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
 //
 
 // These requires base address to be written back or one of the loaded regs.
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 def tLDM : T1I<(outs),
-               (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
+               (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
                IIC_iLoadm,
-               "ldm${addr:submode}${p}\t$addr, $wb", []>,
+               "ldm${addr:submode}${p}\t$addr, $dsts", []>,
            T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
 
+def tLDM_UPD : T1It<(outs tGPR:$wb),
+                    (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
+                    IIC_iLoadm,
+                    "ldm${addr:submode}${p}\t$addr, $dsts",
+                    "$addr.addr = $wb", []>,
+               T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
+} // mayLoad, hasExtraDefRegAllocReq
+
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-def tSTM : T1I<(outs),
-               (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-               IIC_iStorem,
-               "stm${addr:submode}${p}\t$addr, $wb", []>,
+def tSTM_UPD : T1It<(outs tGPR:$wb),
+                    (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
+                    IIC_iStorem,
+                    "stm${addr:submode}${p}\t$addr, $srcs",
+                    "$addr.addr = $wb", []>,
            T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189
 
 let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
-def tPOP : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
-               "pop${p}\t$wb", []>,
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$dsts, variable_ops), IIC_Br,
+               "pop${p}\t$dsts", []>,
            T1Misc<{1,1,0,?,?,?,?}>;
 
 let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
-def tPUSH : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br,
-                "push${p}\t$wb", []>,
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$srcs, variable_ops), IIC_Br,
+                "push${p}\t$srcs", []>,
             T1Misc<{0,1,0,?,?,?,?}>;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 2fc7d2f..ab9e926 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -150,6 +150,10 @@ def t2addrmode_imm8s4 : Operand<i32>,
   let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
 }
 
+def t2am_imm8s4_offset : Operand<i32> {
+  let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+}
+
 // t2addrmode_so_reg  := reg + (reg << imm2)
 def t2addrmode_so_reg : Operand<i32>,
                         ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
@@ -903,7 +907,7 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
 def t2LDRDi8  : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
                         (ins t2addrmode_imm8s4:$addr),
                         IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>;
-def t2LDRDpci : T2Ii8s4<?, ?, 1, (outs GPR:$dst1, GPR:$dst2),
+def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
                         (ins i32imm:$addr), IIC_iLoadi,
                        "ldrd", "\t$dst1, $addr", []> {
   let Inst{19-16} = 0b1111; // Rn
@@ -1105,14 +1109,34 @@ def t2STRT   : T2IstT<0b10, "strt">;
 def t2STRBT  : T2IstT<0b00, "strbt">;
 def t2STRHT  : T2IstT<0b01, "strht">;
 
-// FIXME: ldrd / strd pre / post variants
+// ldrd / strd pre / post variants
+// For disassembly only.
+
+def t2LDRD_PRE  : T2Ii8s4<1, 1, 1, (outs GPR:$dst1, GPR:$dst2),
+                 (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary,
+                 "ldrd", "\t$dst1, $dst2, [$base, $imm]!", []>;
+
+def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$dst1, GPR:$dst2),
+                 (ins GPR:$base, t2am_imm8s4_offset:$imm), NoItinerary,
+                 "ldrd", "\t$dst1, $dst2, [$base], $imm", []>;
+
+def t2STRD_PRE  : T2Ii8s4<1, 1, 0, (outs),
+                 (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 NoItinerary, "strd", "\t$src1, $src2, [$base, $imm]!", []>;
+
+def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
+                 (ins GPR:$src1, GPR:$src2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 NoItinerary, "strd", "\t$src1, $src2, [$base], $imm", []>;
 
 // T2Ipl (Preload Data/Instruction) signals the memory system of possible future
 // data/instruction access.  These are for disassembly only.
+//
+// A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
+// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
 multiclass T2Ipl<bit instr, bit write, string opc> {
 
-  def i12 : T2I<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoadi, opc,
-                "\t$addr", []> {
+  def i12 : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc,
+                "\t[$base, $imm]", []> {
     let Inst{31-25} = 0b1111100;
     let Inst{24} = instr;
     let Inst{23} = 1; // U = 1
@@ -1122,8 +1146,8 @@ multiclass T2Ipl<bit instr, bit write, string opc> {
     let Inst{15-12} = 0b1111;
   }
 
-  def i8 : T2I<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc,
-                "\t$addr", []> {
+  def i8 : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc,
+                "\t[$base, $imm]", []> {
     let Inst{31-25} = 0b1111100;
     let Inst{24} = instr;
     let Inst{23} = 0; // U = 0
@@ -1134,9 +1158,8 @@ multiclass T2Ipl<bit instr, bit write, string opc> {
     let Inst{11-8} = 0b1100;
   }
 
-  // A8.6.118 #0 and #-0 differs.  Translates -0 to -1, -1 to -2, ..., etc.
-  def pci : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc,
-                "\t[pc, ${imm:negzero}]", []> {
+  def pci : T2I<(outs), (ins GPR:$base, neg_zero:$imm), IIC_iLoadi, opc,
+                "\t[pc, $imm]", []> {
     let Inst{31-25} = 0b1111100;
     let Inst{24} = instr;
     let Inst{23} = ?; // add = (U == 1)
@@ -1181,29 +1204,56 @@ defm t2PLI  : T2Ipl<1, 0, "pli">;
 //  Load / store multiple Instructions.
 //
 
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-def t2LDM : T2XI<(outs),
-                 (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-             IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
+                          reglist:$dsts, variable_ops), IIC_iLoadm,
+                 "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b00;
+  let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
+  let Inst{22} = 0;
+  let Inst{21} = 0; // The W bit.
+  let Inst{20} = 1; // Load
+}
+
+def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
+                                       reglist:$dsts, variable_ops), IIC_iLoadm,
+                      "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts",
+                      "$addr.addr = $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
   let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
   let Inst{22} = 0;
-  let Inst{21} = ?; // The W bit.
+  let Inst{21} = 1; // The W bit.
   let Inst{20} = 1; // Load
 }
+} // mayLoad, hasExtraDefRegAllocReq
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
+                          reglist:$srcs, variable_ops), IIC_iStorem,
+                 "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b00;
+  let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
+  let Inst{22} = 0;
+  let Inst{21} = 0; // The W bit.
+  let Inst{20} = 0; // Store
+}
 
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-def t2STM : T2XI<(outs),
-                 (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-            IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
+def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
+                                       reglist:$srcs, variable_ops),
+                      IIC_iStorem,
+                      "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs",
+                      "$addr.addr = $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
   let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
   let Inst{22} = 0;
-  let Inst{21} = ?; // The W bit.
+  let Inst{21} = 1; // The W bit.
   let Inst{20} = 0; // Store
 }
+} // mayStore, hasExtraSrcRegAllocReq
 
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
@@ -2136,7 +2186,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
 // memory barriers protect the atomic sequences
 let hasSideEffects = 1 in {
 def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
-                        Pseudo, NoItinerary,
+                        ThumbFrm, NoItinerary,
                         "dmb", "",
                         [(ARMMemBarrierV7)]>,
                         Requires<[IsThumb2]> {
@@ -2146,7 +2196,7 @@ def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
 }
 
 def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
-                        Pseudo, NoItinerary,
+                        ThumbFrm, NoItinerary,
                         "dsb", "",
                         [(ARMSyncBarrierV7)]>,
                         Requires<[IsThumb2]> {
@@ -2351,15 +2401,15 @@ let Defs =
 // FIXME: Should pc be an implicit operand like PICADD, etc?
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1 in
-  def t2LDM_RET : T2XI<(outs),
-                    (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
-                    IIC_Br, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb",
-                    []> {
+  def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
+                                         reglist:$dsts, variable_ops), IIC_Br,
+                        "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts",
+                        "$addr.addr = $wb", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-25} = 0b00;
   let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
   let Inst{22} = 0;
-  let Inst{21} = ?; // The W bit.
+  let Inst{21} = 1; // The W bit.
   let Inst{20} = 1; // Load
 }
 
@@ -2469,7 +2519,7 @@ def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
 // opt{5} = changemode from Inst{17}
 // opt{8-6} = AIF from Inst{8-6}
 // opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
-def t2CPS : T2XI<(outs),(ins i32imm:$opt), NoItinerary, "cps${opt:cps}",
+def t2CPS : T2XI<(outs),(ins cps_opt:$opt), NoItinerary, "cps$opt",
                  [/* For disassembly only; pattern left blank */]> {
   let Inst{31-27} = 0b11110;
   let Inst{26} = 0;
@@ -2638,8 +2688,8 @@ def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
 }
 
 // Rn = Inst{19-16}
-def t2MSR : T2I<(outs), (ins GPR:$src, i32imm:$mask), NoItinerary, "msr",
-                "\tcpsr${mask:msr}, $src",
+def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+                "\tcpsr$mask, $src",
                 [/* For disassembly only; pattern left blank */]> {
   let Inst{31-27} = 0b11110;
   let Inst{26} = 0;
@@ -2650,8 +2700,8 @@ def t2MSR : T2I<(outs), (ins GPR:$src, i32imm:$mask), NoItinerary, "msr",
 }
 
 // Rn = Inst{19-16}
-def t2MSRsys : T2I<(outs), (ins GPR:$src, i32imm:$mask), NoItinerary, "msr",
-                   "\tspsr${mask:msr}, $src",
+def t2MSRsys : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+                   "\tspsr$mask, $src",
                    [/* For disassembly only; pattern left blank */]> {
   let Inst{31-27} = 0b11110;
   let Inst{26} = 0;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index cad24c4..4d1d48a 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -25,6 +25,8 @@ def arm_ftoui  : SDNode<"ARMISD::FTOUI",  SDT_FTOI>;
 def arm_ftosi  : SDNode<"ARMISD::FTOSI",  SDT_FTOI>;
 def arm_sitof  : SDNode<"ARMISD::SITOF",  SDT_ITOF>;
 def arm_uitof  : SDNode<"ARMISD::UITOF",  SDT_ITOF>;
+def arm_f16tof32 : SDNode<"ARMISD::F16_TO_F32", SDT_ITOF>;
+def arm_f32tof16 : SDNode<"ARMISD::F32_TO_F16", SDT_FTOI>;
 def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
 def arm_cmpfp  : SDNode<"ARMISD::CMPFP",  SDT_ARMCmp, [SDNPOutFlag]>;
 def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
@@ -77,33 +79,61 @@ def VSTRS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
 //
 
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
-                           variable_ops), IIC_fpLoadm,
-                  "vldm${addr:submode}${p}\t${addr:base}, $wb",
-                  []> {
+def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+                           variable_ops), IndexModeNone, IIC_fpLoadm,
+                  "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
   let Inst{20} = 1;
 }
 
-def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
-                           variable_ops), IIC_fpLoadm, 
-                  "vldm${addr:submode}${p}\t${addr:base}, $wb",
-                  []> {
+def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+                           variable_ops), IndexModeNone, IIC_fpLoadm,
+                  "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+  let Inst{20} = 1;
+}
+
+def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+                                       reglist:$dsts, variable_ops),
+                      IndexModeUpd, IIC_fpLoadm,
+                      "vldm${addr:submode}${p}\t${addr:base}, $dsts",
+                      "$addr.base = $wb", []> {
+  let Inst{20} = 1;
+}
+
+def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+                                       reglist:$dsts, variable_ops),
+                      IndexModeUpd, IIC_fpLoadm, 
+                      "vldm${addr:submode}${p}\t${addr:base}, $dsts",
+                      "$addr.base = $wb", []> {
   let Inst{20} = 1;
 }
 } // mayLoad, hasExtraDefRegAllocReq
 
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
-def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
-                           variable_ops), IIC_fpStorem,
-                 "vstm${addr:submode}${p}\t${addr:base}, $wb",
-                 []> {
+def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+                           variable_ops), IndexModeNone, IIC_fpStorem,
+                  "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+  let Inst{20} = 0;
+}
+
+def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+                           variable_ops), IndexModeNone, IIC_fpStorem,
+                  "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
   let Inst{20} = 0;
 }
 
-def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
-                           variable_ops), IIC_fpStorem,
-                 "vstm${addr:submode}${p}\t${addr:base}, $wb",
-                 []> {
+def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+                                       reglist:$srcs, variable_ops),
+                      IndexModeUpd, IIC_fpStorem,
+                      "vstm${addr:submode}${p}\t${addr:base}, $srcs",
+                      "$addr.base = $wb", []> {
+  let Inst{20} = 0;
+}
+
+def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+                                       reglist:$srcs, variable_ops),
+                      IndexModeUpd, IIC_fpStorem,
+                      "vstm${addr:submode}${p}\t${addr:base}, $srcs",
+                      "$addr.base = $wb", []> {
   let Inst{20} = 0;
 }
 } // mayStore, hasExtraSrcRegAllocReq
@@ -229,11 +259,11 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
 
 def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [(set SPR:$dst, (f32 (arm_f32tof16 SPR:$a)))]>;
 
 def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [(set SPR:$dst, (arm_f16tof32 SPR:$a))]>;
 
 def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
                  /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 19f1e3b..8fbcf45 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -243,8 +243,9 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
     BaseKill = true;  // New base is always killed right its use.
   }
 
-  bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
-  bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
+  bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
+  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
+                Opcode == ARM::VLDRD);
   Opcode = getLoadStoreMultipleOpcode(Opcode);
   MachineInstrBuilder MIB = (isAM4)
     ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
@@ -254,7 +255,6 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
         .addReg(Base, getKillRegState(BaseKill))
         .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
         .addImm(Pred).addReg(PredReg);
-  MIB.addReg(0); // Add optional writeback (0 for now).
   for (unsigned i = 0; i != NumRegs; ++i)
     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
                      | getKillRegState(Regs[i].second));
@@ -443,7 +443,7 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
   case ARM::STM:
   case ARM::t2LDM:
   case ARM::t2STM:
-    return (MI->getNumOperands() - 5) * 4;
+    return (MI->getNumOperands() - 4) * 4;
   case ARM::VLDMS:
   case ARM::VSTMS:
   case ARM::VLDMD:
@@ -452,6 +452,21 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
   }
 }
 
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc) {
+  switch (Opc) {
+  case ARM::LDM: return ARM::LDM_UPD;
+  case ARM::STM: return ARM::STM_UPD;
+  case ARM::t2LDM: return ARM::t2LDM_UPD;
+  case ARM::t2STM: return ARM::t2STM_UPD;
+  case ARM::VLDMS: return ARM::VLDMS_UPD;
+  case ARM::VLDMD: return ARM::VLDMD_UPD;
+  case ARM::VSTMS: return ARM::VSTMS_UPD;
+  case ARM::VSTMD: return ARM::VSTMD_UPD;
+  default: llvm_unreachable("Unhandled opcode!");
+  }
+  return 0;
+}
+
 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
 ///
@@ -470,117 +485,119 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator &I) {
   MachineInstr *MI = MBBI;
   unsigned Base = MI->getOperand(0).getReg();
+  bool BaseKill = MI->getOperand(0).isKill();
   unsigned Bytes = getLSMultipleTransferSize(MI);
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
   int Opcode = MI->getOpcode();
-  bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
-    Opcode == ARM::STM || Opcode == ARM::t2STM;
+  DebugLoc dl = MI->getDebugLoc();
+  bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
+                Opcode == ARM::STM || Opcode == ARM::t2STM);
 
-  if (isAM4) {
-    if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
-      return false;
+  bool DoMerge = false;
+  ARM_AM::AMSubMode Mode = ARM_AM::ia;
+  unsigned Offset = 0;
 
-    // Can't use the updating AM4 sub-mode if the base register is also a dest
+  if (isAM4) {
+    // Can't use an updating ld/st if the base register is also a dest
     // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
     for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
       if (MI->getOperand(i).getReg() == Base)
         return false;
     }
+    assert(!ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()));
+    Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+  } else {
+    // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
+    assert(!ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()));
+    Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
+    Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+  }
 
-    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
-    if (MBBI != MBB.begin()) {
-      MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+  // Try merging with the previous instruction.
+  if (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+    if (isAM4) {
       if (Mode == ARM_AM::ia &&
           isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
-        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
-        MI->getOperand(4).setReg(Base);
-        MI->getOperand(4).setIsDef();
-        MBB.erase(PrevMBBI);
-        return true;
-      } else if (Mode == ARM_AM::ib &&
+        DoMerge = true;
+        Mode = ARM_AM::db;
+      } else if (isAM4 && Mode == ARM_AM::ib &&
                  isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
-        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
-        MI->getOperand(4).setReg(Base);  // WB to base
-        MI->getOperand(4).setIsDef();
-        MBB.erase(PrevMBBI);
-        return true;
+        DoMerge = true;
+        Mode = ARM_AM::da;
+      }
+    } else {
+      if (Mode == ARM_AM::ia &&
+          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+        Mode = ARM_AM::db;
+        DoMerge = true;
       }
     }
+    if (DoMerge)
+      MBB.erase(PrevMBBI);
+  }
 
-    if (MBBI != MBB.end()) {
-      MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+  // Try merging with the next instruction.
+  if (!DoMerge && MBBI != MBB.end()) {
+    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+    if (isAM4) {
       if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
           isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
-        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
-        MI->getOperand(4).setReg(Base);  // WB to base
-        MI->getOperand(4).setIsDef();
-        if (NextMBBI == I) {
-          Advance = true;
-          ++I;
-        }
-        MBB.erase(NextMBBI);
-        return true;
+        DoMerge = true;
       } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
                  isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
-        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
-        MI->getOperand(4).setReg(Base);  // WB to base
-        MI->getOperand(4).setIsDef();
-        if (NextMBBI == I) {
-          Advance = true;
-          ++I;
-        }
-        MBB.erase(NextMBBI);
-        return true;
+        DoMerge = true;
       }
-    }
-  } else {
-    // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
-    if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
-      return false;
-
-    ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
-    unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
-    if (MBBI != MBB.begin()) {
-      MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+    } else {
       if (Mode == ARM_AM::ia &&
-          isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
-        MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
-        MI->getOperand(4).setReg(Base);  // WB to base
-        MI->getOperand(4).setIsDef();
-        MBB.erase(PrevMBBI);
-        return true;
+          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+        DoMerge = true;
       }
     }
-
-    if (MBBI != MBB.end()) {
-      MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
-      if (Mode == ARM_AM::ia &&
-          isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
-        MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
-        MI->getOperand(4).setReg(Base);  // WB to base
-        MI->getOperand(4).setIsDef();
-        if (NextMBBI == I) {
-          Advance = true;
-          ++I;
-        }
-        MBB.erase(NextMBBI);
+    if (DoMerge) {
+      if (NextMBBI == I) {
+        Advance = true;
+        ++I;
       }
-      return true;
+      MBB.erase(NextMBBI);
     }
   }
 
-  return false;
+  if (!DoMerge)
+    return false;
+
+  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+    .addReg(Base, getDefRegState(true)) // WB base register
+    .addReg(Base, getKillRegState(BaseKill));
+  if (isAM4) {
+    // [t2]LDM_UPD, [t2]STM_UPD
+    MIB.addImm(ARM_AM::getAM4ModeImm(Mode, true))
+      .addImm(Pred).addReg(PredReg);
+  } else {
+    // VLDM[SD}_UPD, VSTM[SD]_UPD
+    MIB.addImm(ARM_AM::getAM5Opc(Mode, true, Offset))
+      .addImm(Pred).addReg(PredReg);
+  }
+  // Transfer the rest of operands.
+  for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+    MIB.addOperand(MI->getOperand(OpNum));
+  // Transfer memoperands.
+  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+  MBB.erase(MBBI);
+  return true;
 }
 
 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
   switch (Opc) {
   case ARM::LDR: return ARM::LDR_PRE;
   case ARM::STR: return ARM::STR_PRE;
-  case ARM::VLDRS: return ARM::VLDMS;
-  case ARM::VLDRD: return ARM::VLDMD;
-  case ARM::VSTRS: return ARM::VSTMS;
-  case ARM::VSTRD: return ARM::VSTMD;
+  case ARM::VLDRS: return ARM::VLDMS_UPD;
+  case ARM::VLDRD: return ARM::VLDMD_UPD;
+  case ARM::VSTRS: return ARM::VSTMS_UPD;
+  case ARM::VSTRD: return ARM::VSTMD_UPD;
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
     return ARM::t2LDR_PRE;
@@ -596,10 +613,10 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
   switch (Opc) {
   case ARM::LDR: return ARM::LDR_POST;
   case ARM::STR: return ARM::STR_POST;
-  case ARM::VLDRS: return ARM::VLDMS;
-  case ARM::VLDRD: return ARM::VLDMD;
-  case ARM::VSTRS: return ARM::VSTMS;
-  case ARM::VSTRD: return ARM::VSTMD;
+  case ARM::VLDRS: return ARM::VLDMS_UPD;
+  case ARM::VLDRD: return ARM::VLDMD_UPD;
+  case ARM::VSTRS: return ARM::VSTMS_UPD;
+  case ARM::VSTRD: return ARM::VSTMD_UPD;
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
     return ARM::t2LDR_POST;
@@ -624,14 +641,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   unsigned Bytes = getLSMultipleTransferSize(MI);
   int Opcode = MI->getOpcode();
   DebugLoc dl = MI->getDebugLoc();
-  bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
-    Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
-  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
+  bool isAM2 = (Opcode == ARM::LDR || Opcode == ARM::STR);
   if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
     return false;
-  else if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
     return false;
-  else if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
+  if (isT2i32Load(Opcode) || isT2i32Store(Opcode))
     if (MI->getOperand(2).getImm() != 0)
       return false;
 
@@ -648,33 +665,35 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   unsigned NewOpc = 0;
   // AM2 - 12 bits, thumb2 - 8 bits.
   unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
+
+  // Try merging with the previous instruction.
   if (MBBI != MBB.begin()) {
     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
     if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
       DoMerge = true;
       AddSub = ARM_AM::sub;
-      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
     } else if (!isAM5 &&
                isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
       DoMerge = true;
-      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
     }
-    if (DoMerge)
+    if (DoMerge) {
+      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
       MBB.erase(PrevMBBI);
+    }
   }
 
+  // Try merging with the next instruction.
   if (!DoMerge && MBBI != MBB.end()) {
     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
     if (!isAM5 &&
         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
       DoMerge = true;
       AddSub = ARM_AM::sub;
-      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
     } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
       DoMerge = true;
-      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
     }
     if (DoMerge) {
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
       if (NextMBBI == I) {
         Advance = true;
         ++I;
@@ -689,22 +708,25 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
   unsigned Offset = 0;
   if (isAM5)
-    Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
-                               ? ARM_AM::db
-                               : ARM_AM::ia, true, (isDPR ? 2 : 1));
+    Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
+                               true, (isDPR ? 2 : 1));
   else if (isAM2)
     Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
   else
     Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
-  if (isLd) {
-    if (isAM5)
-      // VLDMS, VLDMD
-      BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
-        .addReg(Base, getKillRegState(BaseKill))
-        .addImm(Offset).addImm(Pred).addReg(PredReg)
-        .addReg(Base, getDefRegState(true)) // WB base register
-        .addReg(MI->getOperand(0).getReg(), RegState::Define);
-    else if (isAM2)
+
+  if (isAM5) {
+    // VLDM[SD}_UPD, VSTM[SD]_UPD
+    MachineOperand &MO = MI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+      .addReg(Base, getDefRegState(true)) // WB base register
+      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+      .addImm(Offset)
+      .addImm(Pred).addReg(PredReg)
+      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
+                            getKillRegState(MO.isKill())));
+  } else if (isLd) {
+    if (isAM2)
       // LDR_PRE, LDR_POST,
       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
         .addReg(Base, RegState::Define)
@@ -716,13 +738,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
   } else {
     MachineOperand &MO = MI->getOperand(0);
-    if (isAM5)
-      // VSTMS, VSTMD
-      BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
-        .addImm(Pred).addReg(PredReg)
-        .addReg(Base, getDefRegState(true)) // WB base register
-        .addReg(MO.getReg(), getKillRegState(MO.isKill()));
-    else if (isAM2)
+    if (isAM2)
       // STR_PRE, STR_POST
       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
@@ -910,7 +926,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
           .addReg(BaseReg, getKillRegState(BaseKill))
           .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
           .addImm(Pred).addReg(PredReg)
-          .addReg(0)
           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
         ++NumLDRD2LDM;
@@ -919,7 +934,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
           .addReg(BaseReg, getKillRegState(BaseKill))
           .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
           .addImm(Pred).addReg(PredReg)
-          .addReg(0)
           .addReg(EvenReg,
                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
           .addReg(OddReg,
@@ -1157,7 +1171,8 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
   if (MBBI != MBB.begin() &&
       (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) {
     MachineInstr *PrevMI = prior(MBBI);
-    if (PrevMI->getOpcode() == ARM::LDM || PrevMI->getOpcode() == ARM::t2LDM) {
+    if (PrevMI->getOpcode() == ARM::LDM_UPD ||
+        PrevMI->getOpcode() == ARM::t2LDM_UPD) {
       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
       if (MO.getReg() != ARM::LR)
         return false;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 622034b..2dad7f1 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -41,6 +41,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   , PostRAScheduler(false)
   , IsR9Reserved(ReserveR9)
   , UseMovt(UseMOVT)
+  , HasFP16(false)
   , stackAlignment(4)
   , CPUString("generic")
   , TargetType(isELF) // Default to ELF unless otherwise specified.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 6980851..2dc81a4 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -66,6 +66,10 @@ protected:
   /// imms (including global addresses).
   bool UseMovt;
 
+  /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
+  /// only so far)
+  bool HasFP16;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -94,9 +98,9 @@ protected:
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
   unsigned getMaxInlineSizeThreshold() const {
-    // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb.
-    // Change this once Thumb ldmia / stmia support is added.
-    return isThumb() ? 0 : 64;
+    // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
+    // Change this once Thumb1 ldmia / stmia support is added.
+    return isThumb1Only() ? 0 : 64;
   }
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
@@ -116,6 +120,8 @@ protected:
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP; }
 
+  bool hasFP16() const { return HasFP16; }
+
   bool isTargetDarwin() const { return TargetType == isDarwin; }
   bool isTargetELF() const { return TargetType == isELF; }
 
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 7463e30..680d032 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -10,7 +10,6 @@
 #include "ARMTargetObjectFile.h"
 #include "ARMSubtarget.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Target/TargetMachine.h"
 using namespace llvm;
@@ -35,20 +34,3 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                     SectionKind::getDataRel());
   }
 }
-
-//===----------------------------------------------------------------------===//
-//                              Mach-O Target
-//===----------------------------------------------------------------------===//
-
-void ARMMachOTargetObjectFile::Initialize(MCContext &Ctx,
-                                          const TargetMachine &TM) {
-  TargetLoweringObjectFileMachO::Initialize(Ctx, TM);
-
-  // Exception Handling.
-  LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0,
-                                SectionKind::getReadOnlyWithRel());
-}
-
-unsigned ARMMachOTargetObjectFile::getTTypeEncoding() const {
-  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 481d7ab..097fc2c 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -24,18 +24,6 @@ public:
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
 };
 
-// FIXME: This subclass isn't 100% necessary. It will become obsolete once we
-//        can place all LSDAs into the TEXT section. See
-//        <rdar://problem/6804645>.
-class ARMMachOTargetObjectFile : public TargetLoweringObjectFileMachO {
-public:
-  ARMMachOTargetObjectFile() : TargetLoweringObjectFileMachO() {}
-
-  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
-
-  virtual unsigned getTTypeEncoding() const;
-};
-
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/ARM/AsmParser/Makefile b/lib/Target/ARM/AsmParser/Makefile
index 97e5612..841516f 100644
--- a/lib/Target/ARM/AsmParser/Makefile
+++ b/lib/Target/ARM/AsmParser/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMARMAsmParser
 
 # Hack: we need to include 'main' ARM target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 314114c..4db14a3 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -38,6 +38,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -74,9 +75,8 @@ namespace {
 
   public:
     explicit ARMAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                           MCContext &Ctx, MCStreamer &Streamer,
-                           const MCAsmInfo *T)
-      : AsmPrinter(O, TM, Ctx, Streamer, T), AFI(NULL), MCP(NULL) {
+                           MCStreamer &Streamer)
+      : AsmPrinter(O, TM, Streamer), AFI(NULL), MCP(NULL) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
     }
 
@@ -120,8 +120,12 @@ namespace {
     void printT2AddrModeImm8Operand(const MachineInstr *MI, int OpNum);
     void printT2AddrModeImm8s4Operand(const MachineInstr *MI, int OpNum);
     void printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum);
+    void printT2AddrModeImm8s4OffsetOperand(const MachineInstr *MI, int OpNum) {}
     void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum);
 
+    void printCPSOptionOperand(const MachineInstr *MI, int OpNum) {}
+    void printMSRMaskOperand(const MachineInstr *MI, int OpNum) {}
+    void printNegZeroOperand(const MachineInstr *MI, int OpNum) {}
     void printPredicateOperand(const MachineInstr *MI, int OpNum);
     void printMandatoryPredicateOperand(const MachineInstr *MI, int OpNum);
     void printSBitModifierOperand(const MachineInstr *MI, int OpNum);
@@ -194,7 +198,7 @@ namespace {
         bool isIndirect = Subtarget->isTargetDarwin() &&
           Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
         if (!isIndirect)
-          O << *GetGlobalValueSymbol(GV);
+          O << *Mang->getSymbol(GV);
         else {
           // FIXME: Remove this when Darwin transition to @GOT like syntax.
           MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
@@ -202,11 +206,12 @@ namespace {
           
           MachineModuleInfoMachO &MMIMachO =
             MMI->getObjFileInfo<MachineModuleInfoMachO>();
-          MCSymbol *&StubSym =
+          MachineModuleInfoImpl::StubValueTy &StubSym =
             GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) :
                                         MMIMachO.getGVStubEntry(Sym);
-          if (StubSym == 0)
-            StubSym = GetGlobalValueSymbol(GV);
+          if (StubSym.getPointer() == 0)
+            StubSym = MachineModuleInfoImpl::
+              StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
         }
       } else {
         assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
@@ -299,7 +304,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     break;
   }
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_GlobalAddress: {
     bool isCallOp = Modifier && !strcmp(Modifier, "call");
@@ -311,7 +316,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
              (TF & ARMII::MO_HI16))
       O << ":upper16:";
-    O << *GetGlobalValueSymbol(GV);
+    O << *Mang->getSymbol(GV);
 
     printOffset(MO.getOffset());
 
@@ -516,8 +521,10 @@ void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
     if (MO1.getReg() == ARM::SP) {
       // FIXME
       bool isLDM = (MI->getOpcode() == ARM::LDM ||
+                    MI->getOpcode() == ARM::LDM_UPD ||
                     MI->getOpcode() == ARM::LDM_RET ||
                     MI->getOpcode() == ARM::t2LDM ||
+                    MI->getOpcode() == ARM::t2LDM_UPD ||
                     MI->getOpcode() == ARM::t2LDM_RET);
       O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
     } else
@@ -810,11 +817,10 @@ void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) {
 
 void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) {
   O << "{";
-  // Always skip the first operand, it's the optional (and implicit writeback).
-  for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) {
+  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
     if (MI->getOperand(i).isImplicit())
       continue;
-    if ((int)i != OpNum+1) O << ", ";
+    if ((int)i != OpNum) O << ", ";
     printOperand(MI, i);
   }
   O << "}";
@@ -884,16 +890,16 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
     if (UseSet && isNew) {
       O << "\t.set\t"
         << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB) << ','
-        << *MBB->getSymbol(OutContext) << '-' << *JTISymbol << '\n';
+        << *MBB->getSymbol() << '-' << *JTISymbol << '\n';
     }
 
     O << JTEntryDirective << ' ';
     if (UseSet)
       O << *GetARMSetPICJumpTableLabel2(JTI, MO2.getImm(), MBB);
     else if (TM.getRelocationModel() == Reloc::PIC_)
-      O << *MBB->getSymbol(OutContext) << '-' << *JTISymbol;
+      O << *MBB->getSymbol() << '-' << *JTISymbol;
     else
-      O << *MBB->getSymbol(OutContext);
+      O << *MBB->getSymbol();
 
     if (i != e-1)
       O << '\n';
@@ -925,9 +931,9 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum) {
       O << MAI->getData16bitsDirective();
     
     if (ByteOffset || HalfWordOffset)
-      O << '(' << *MBB->getSymbol(OutContext) << "-" << *JTISymbol << ")/2";
+      O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2";
     else
-      O << "\tb.w " << *MBB->getSymbol(OutContext);
+      O << "\tb.w " << *MBB->getSymbol();
 
     if (i != e-1)
       O << '\n';
@@ -1123,7 +1129,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
 
     // Output non-lazy-pointers for external and common global variables.
     MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
-    
+
     if (!Stubs.empty()) {
       // Switch with ".non_lazy_symbol_pointer" directive.
       OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
@@ -1132,15 +1138,16 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
         // L_foo$stub:
         OutStreamer.EmitLabel(Stubs[i].first);
         //   .indirect_symbol _foo
-        MCSymbol *MCSym = Stubs[i].second;
-        OutStreamer.EmitSymbolAttribute(MCSym, MCSA_IndirectSymbol);
+        MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+        OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol);
 
-        if (MCSym->isUndefined())
+        if (MCSym.getInt())
           // External to current translation unit.
           OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
         else
           // Internal to current translation unit.
-          OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym, OutContext),
+          OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                        OutContext),
                                 4/*size*/, 0/*addrspace*/);
       }
 
@@ -1156,8 +1163,9 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
         // L_foo$stub:
         OutStreamer.EmitLabel(Stubs[i].first);
         //   .long _foo
-        OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second,
-                                                      OutContext),
+        OutStreamer.EmitValue(MCSymbolRefExpr::
+                              Create(Stubs[i].second.getPointer(),
+                                     OutContext),
                               4/*size*/, 0/*addrspace*/);
       }
 
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index b7964c9..9a3cbc3 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -50,7 +50,7 @@ public:
   void printAddrMode6Operand(const MCInst *MI, unsigned OpNum);
   void printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
                               const char *Modifier = 0);
-    
+
   void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum);
 
   void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum);
@@ -68,8 +68,12 @@ public:
   void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum) {}
   void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum) {}
   void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum) {}
+  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum) {}
   void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {}
   
+  void printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {}
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {}
+  void printNegZeroOperand(const MCInst *MI, unsigned OpNum) {}
   void printPredicateOperand(const MCInst *MI, unsigned OpNum);
   void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum);
   void printSBitModifierOperand(const MCInst *MI, unsigned OpNum);
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
index 5f8705e..7cb305f 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 //#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
 using namespace llvm;
@@ -45,7 +46,7 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const {
   case 0: break;
   }
   
-  return Printer.GetGlobalValueSymbol(MO.getGlobal());
+  return Printer.Mang->getSymbol(MO.getGlobal());
 }
 
 MCSymbol *ARMMCInstLower::
@@ -135,7 +136,7 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       break;
     case MachineOperand::MO_MachineBasicBlock:
       MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                       MO.getMBB()->getSymbol(Ctx), Ctx));
+                       MO.getMBB()->getSymbol(), Ctx));
       break;
     case MachineOperand::MO_GlobalAddress:
       MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile
index 208becc..65d372e 100644
--- a/lib/Target/ARM/AsmPrinter/Makefile
+++ b/lib/Target/ARM/AsmPrinter/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMARMAsmPrinter
 
 # Hack: we need to include 'main' arm target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 7f42c82..29ae631 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -159,7 +159,6 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
   AddDefaultPred(MIB);
-  MIB.addReg(0); // No write back.
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
     // Add the callee-saved register as live-in. It's killed at the spill.
@@ -182,7 +181,6 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   DebugLoc DL = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::tPOP));
   AddDefaultPred(MIB);
-  MIB.addReg(0); // No write back.
 
   bool NumRegs = false;
   for (unsigned i = CSI.size(); i != 0; --i) {
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 6215d2f..99c38b1 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -791,9 +791,9 @@ static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
       isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
     return true;
   else if (MI->getOpcode() == ARM::tPOP) {
-    // The first three operands are predicates and such. The last two are
+    // The first two operands are predicates. The last two are
     // imp-def and imp-use of SP. Check everything in between.
-    for (int i = 3, e = MI->getNumOperands() - 2; i != e; ++i)
+    for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
       if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
         return false;
     return true;
@@ -854,12 +854,16 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
   }
 
   if (VARegSaveSize) {
+    // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+    // to LR, and we can't pop the value directly to the PC since
+    // we need to update the SP after popping the value. Therefore, we
+    // pop the old LR into R3 as a temporary.
+
     // Move back past the callee-saved register restoration
     while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
       ++MBBI;
     // Epilogue for vararg functions: pop LR to R3 and branch off it.
     AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
-      .addReg(0) // No write back.
       .addReg(ARM::R3, RegState::Define);
 
     emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 5086eff..2bc75f2 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -121,9 +121,11 @@ namespace {
     { ARM::t2STRHi12,ARM::tSTRH,  0,             5,   0,    1,   0,  0,0, 1 },
     { ARM::t2STRHs, ARM::tSTRH,   0,             0,   0,    1,   0,  0,0, 1 },
 
+    { ARM::t2LDM,   ARM::tLDM,    0,             0,   0,    1,   1,  1,1, 1 },
     { ARM::t2LDM_RET,0,           ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 1 },
-    { ARM::t2LDM,   ARM::tLDM,    ARM::tPOP,     0,   0,    1,   1,  1,1, 1 },
-    { ARM::t2STM,   ARM::tSTM,    ARM::tPUSH,    0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2LDM_UPD,ARM::tLDM_UPD,ARM::tPOP,    0,   0,    1,   1,  1,1, 1 },
+    // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+    { ARM::t2STM_UPD,ARM::tSTM_UPD,ARM::tPUSH,   0,   0,    1,   1,  1,1, 1 },
   };
 
   class Thumb2SizeReduce : public MachineFunctionPass {
@@ -231,8 +233,9 @@ Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
 
 static bool VerifyLowRegs(MachineInstr *MI) {
   unsigned Opc = MI->getOpcode();
-  bool isPCOk = (Opc == ARM::t2LDM_RET) || (Opc == ARM::t2LDM);
-  bool isLROk = (Opc == ARM::t2STM);
+  bool isPCOk = (Opc == ARM::t2LDM_RET || Opc == ARM::t2LDM ||
+                 Opc == ARM::t2LDM_UPD);
+  bool isLROk = (Opc == ARM::t2STM_UPD);
   bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
@@ -307,19 +310,35 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     HasShift = true;
     OpNum = 4;
     break;
-  case ARM::t2LDM_RET:
-  case ARM::t2LDM:
-  case ARM::t2STM: {
-    OpNum = 0;
+  case ARM::t2LDM: {
     unsigned BaseReg = MI->getOperand(0).getReg();
-    unsigned Mode = MI->getOperand(1).getImm();
-    if (BaseReg == ARM::SP && ARM_AM::getAM4WBFlag(Mode)) {
-      Opc = Entry.NarrowOpc2;
-      OpNum = 2;
-    } else if (Entry.WideOpc == ARM::t2LDM_RET ||
-               !isARMLowRegister(BaseReg) ||
-               !ARM_AM::getAM4WBFlag(Mode) ||
-               ARM_AM::getAM4SubMode(Mode) != ARM_AM::ia) {
+    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+    if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia)
+      return false;
+    OpNum = 0;
+    isLdStMul = true;
+    break;
+  }
+  case ARM::t2LDM_RET: {
+    unsigned BaseReg = MI->getOperand(1).getReg();
+    if (BaseReg != ARM::SP)
+      return false;
+    Opc = Entry.NarrowOpc2; // tPOP_RET
+    OpNum = 3;
+    isLdStMul = true;
+    break;
+  }
+  case ARM::t2LDM_UPD:
+  case ARM::t2STM_UPD: {
+    OpNum = 0;
+    unsigned BaseReg = MI->getOperand(1).getReg();
+    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(2).getImm());
+    if (BaseReg == ARM::SP &&
+        ((Entry.WideOpc == ARM::t2LDM_UPD && Mode == ARM_AM::ia) ||
+         (Entry.WideOpc == ARM::t2STM_UPD && Mode == ARM_AM::db))) {
+      Opc = Entry.NarrowOpc2; // tPOP or tPUSH
+      OpNum = 3;
+    } else if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia) {
       return false;
     }
     isLdStMul = true;
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 95de3d8..91e58ce 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -1059,8 +1059,8 @@ def : Pat<(i64 immSExt16:$imm),
 def : Pat<(i64 immSExt16int:$imm),
           (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
 def : Pat<(i64 immConst2PartInt:$imm),
-          (ZAPNOTi (LDA (LL16 (SExt32 immConst2PartInt:$imm)), 
-                        (LDAH (LH16 (SExt32 immConst2PartInt:$imm)), R31)), 15)>;
+          (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))),
+                        (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))), R31)), 15)>;
 
 
 //TODO: I want to just define these like this!
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 733a46c..093cf05 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -24,6 +24,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -37,9 +38,8 @@ namespace {
     ///
 
     explicit AlphaAsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
-                             MCContext &Ctx, MCStreamer &Streamer,
-                             const MCAsmInfo *T)
-      : AsmPrinter(o, tm, Ctx, Streamer, T) {}
+                             MCStreamer &Streamer)
+      : AsmPrinter(o, tm, Streamer) {}
 
     virtual const char *getPassName() const {
       return "Alpha Assembly Printer";
@@ -96,7 +96,7 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
     return;
 
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
 
   case MachineOperand::MO_ConstantPoolIndex:
@@ -109,7 +109,7 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
     return;
 
   case MachineOperand::MO_GlobalAddress:
-    O << *GetGlobalValueSymbol(MO.getGlobal());
+    O << *Mang->getSymbol(MO.getGlobal());
     return;
 
   case MachineOperand::MO_JumpTableIndex:
diff --git a/lib/Target/Alpha/AsmPrinter/Makefile b/lib/Target/Alpha/AsmPrinter/Makefile
index 3c64a3c..ea13c38 100644
--- a/lib/Target/Alpha/AsmPrinter/Makefile
+++ b/lib/Target/Alpha/AsmPrinter/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMAlphaAsmPrinter
 
 # Hack: we need to include 'main' alpha target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
index fe13e14..1c6d841 100644
--- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
+++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -39,9 +40,8 @@ namespace {
   class BlackfinAsmPrinter : public AsmPrinter {
   public:
     BlackfinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                       MCContext &Ctx, MCStreamer &Streamer,
-                       const MCAsmInfo *MAI)
-      : AsmPrinter(O, TM, Ctx, Streamer, MAI) {}
+                       MCStreamer &Streamer)
+      : AsmPrinter(O, TM, Streamer) {}
 
     virtual const char *getPassName() const {
       return "Blackfin Assembly Printer";
@@ -82,10 +82,10 @@ void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     O << MO.getImm();
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_GlobalAddress:
-    O << *GetGlobalValueSymbol(MO.getGlobal());
+    O << *Mang->getSymbol(MO.getGlobal());
     printOffset(MO.getOffset());
     break;
   case MachineOperand::MO_ExternalSymbol:
diff --git a/lib/Target/Blackfin/AsmPrinter/Makefile b/lib/Target/Blackfin/AsmPrinter/Makefile
index 091d4df..a106a23 100644
--- a/lib/Target/Blackfin/AsmPrinter/Makefile
+++ b/lib/Target/Blackfin/AsmPrinter/Makefile
@@ -11,6 +11,6 @@ LIBRARYNAME = LLVMBlackfinAsmPrinter
 
 # Hack: we need to include 'main' Blackfin target directory to grab private
 # headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 10f873f..b1ba0d2 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Target/Mangler.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -95,6 +96,7 @@ namespace {
     LoopInfo *LI;
     const Module *TheModule;
     const MCAsmInfo* TAsm;
+    MCContext *TCtx;
     const TargetData* TD;
     std::map<const Type *, std::string> TypeNames;
     std::map<const ConstantFP *, unsigned> FPConstantMap;
@@ -1731,7 +1733,8 @@ bool CWriter::doInitialization(Module &M) {
     TAsm = Match->createAsmInfo(Triple);
 #endif    
   TAsm = new CBEMCAsmInfo();
-  Mang = new Mangler(*TAsm);
+  TCtx = new MCContext(*TAsm);
+  Mang = new Mangler(*TCtx, *TD);
 
   // Keep track of which functions are static ctors/dtors so they can have
   // an attribute added to their prototypes.
diff --git a/lib/Target/CellSPU/AsmPrinter/Makefile b/lib/Target/CellSPU/AsmPrinter/Makefile
index 69639ef..4ec9d04 100644
--- a/lib/Target/CellSPU/AsmPrinter/Makefile
+++ b/lib/Target/CellSPU/AsmPrinter/Makefile
@@ -12,6 +12,6 @@ LIBRARYNAME = LLVMCellSPUAsmPrinter
 
 # Hack: we need to include 'main' CellSPU target directory to grab
 # private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 2ca05c2..857ddcf 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -24,6 +24,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOptions.h"
@@ -38,9 +39,8 @@ namespace {
   class SPUAsmPrinter : public AsmPrinter {
   public:
     explicit SPUAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                           MCContext &Ctx, MCStreamer &Streamer,
-                           const MCAsmInfo *T) :
-      AsmPrinter(O, TM, Ctx, Streamer, T) {}
+                           MCStreamer &Streamer) :
+      AsmPrinter(O, TM, Streamer) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
@@ -281,7 +281,7 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO) {
     return;
 
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_JumpTableIndex:
     O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
@@ -311,7 +311,7 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO) {
         return;
       }
     }
-    O << *GetGlobalValueSymbol(MO.getGlobal());
+    O << *Mang->getSymbol(MO.getGlobal());
     return;
   default:
     O << "<unknown operand type: " << MO.getType() << ">";
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td
index 1fe7aff..9468aee 100644
--- a/lib/Target/CellSPU/CellSDKIntrinsics.td
+++ b/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -205,10 +205,9 @@ def CellSDKnand:
 // Shift/rotate intrinsics:
 //===----------------------------------------------------------------------===//
 
-/* FIXME: These have (currently unenforced) type conflicts. */
 def CellSDKshli:
   Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
-      (SHLIv4i32 VECREG:$rA, uimm7:$val)>;
+      (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>;
 
 def CellSDKshlqbi:
   Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
@@ -216,7 +215,7 @@ def CellSDKshlqbi:
 
 def CellSDKshlqii:
   Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
-      (SHLQBIIv16i8 VECREG:$rA, uimm7:$val)>;
+      (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
 
 def CellSDKshlqby:
   Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
@@ -224,7 +223,8 @@ def CellSDKshlqby:
 
 def CellSDKshlqbyi:
   Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
-      (SHLQBYIv16i8 VECREG:$rA, uimm7:$val)>;
+      (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
           
 //===----------------------------------------------------------------------===//
 // Branch/compare intrinsics:
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 47cb579..069a182 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -155,8 +155,9 @@ multiclass CompareLogicalGreaterEqual64 {
 defm I64LGE: CompareLogicalGreaterEqual64;
 
 def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
-def : Pat<(setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-                  I64LGEv2i64.Fragment>;
+def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+          I64LGEv2i64.Fragment>;
+                  
 
 // i64 setult:
 def : I64SETCCNegCond<setult, I64LGEr64>;
@@ -233,8 +234,8 @@ multiclass CompareGreaterEqual64 {
 defm I64GE: CompareGreaterEqual64;
 
 def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
-def : Pat<(setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
-                  I64GEv2i64.Fragment>;
+def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+          I64GEv2i64.Fragment>;
 
 // i64 setult:
 def : I64SETCCNegCond<setlt, I64GEr64>;
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index b96b64e..ad12604 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -2179,10 +2179,10 @@ multiclass ShiftLeftHalfwordImm
 defm SHLHI : ShiftLeftHalfwordImm;
 
 def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
-          (SHLHIv8i16 VECREG:$rA, uimm7:$val)>;
+          (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>;
 
 def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
-          (SHLHIr16 R16C:$rA, uimm7:$val)>;
+          (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>;
 
 //===----------------------------------------------------------------------===//
 
@@ -2410,8 +2410,8 @@ multiclass RotateLeftHalfwordImm
 
 defm ROTHI: RotateLeftHalfwordImm;
 
-def : Pat<(SPUvec_rotl VECREG:$rA, (i32 uimm7:$val)),
-          (ROTHIv8i16 VECREG:$rA, imm:$val)>;
+def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+          (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>;
 
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 // Rotate word:
@@ -2682,10 +2682,10 @@ def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
           (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
 
 def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
-         (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+         (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
 
 def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
-         (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+         (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
 
 def ROTHMIr16:
     ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
@@ -2695,10 +2695,10 @@ def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
          (ROTHMIr16 R16C:$rA, uimm7:$val)>;
 
 def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
-         (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+         (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
 
 def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
-         (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+         (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
 
 // ROTM v4i32 form: See the ROTHM v8i16 comments.
 class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
@@ -2709,14 +2709,14 @@ def ROTMv4i32:
     ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
              [/* see patterns below - $rB must be negated */]>;
 
-def : Pat<(SPUvec_srl VECREG:$rA, R32C:$rB),
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R32C:$rB),
           (ROTMv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
 
-def : Pat<(SPUvec_srl VECREG:$rA, R16C:$rB),
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R16C:$rB),
           (ROTMv4i32 VECREG:$rA,
                      (SFIr32 (XSHWr16 R16C:$rB), 0))>;
 
-def : Pat<(SPUvec_srl VECREG:$rA, R8C:$rB),
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R8C:$rB),
           (ROTMv4i32 VECREG:$rA,
                      (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
 
@@ -2742,11 +2742,11 @@ def ROTMIv4i32:
       [(set (v4i32 VECREG:$rT),
             (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
 
-def : Pat<(SPUvec_srl VECREG:$rA, (i16 uimm7:$val)),
-          (ROTMIv4i32 VECREG:$rA, uimm7:$val)>;
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)),
+          (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
 
-def : Pat<(SPUvec_srl VECREG:$rA, (i8 uimm7:$val)),
-          (ROTMIv4i32 VECREG:$rA, uimm7:$val)>;
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
+          (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
 
 // ROTMI r32 form: know how to complement the immediate value.
 def ROTMIr32:
@@ -2755,10 +2755,10 @@ def ROTMIr32:
       [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
 
 def : Pat<(srl R32C:$rA, (i16 imm:$val)),
-          (ROTMIr32 R32C:$rA, uimm7:$val)>;
+          (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
 
 def : Pat<(srl R32C:$rA, (i8 imm:$val)),
-          (ROTMIr32 R32C:$rA, uimm7:$val)>;
+          (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
 
 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
 // ROTQMBY: This is a vector form merely so that when used in an
@@ -2916,14 +2916,14 @@ def ROTMAHv8i16:
       "rotmah\t$rT, $rA, $rB", RotateShift,
       [/* see patterns below - $rB must be negated */]>;
 
-def : Pat<(SPUvec_sra VECREG:$rA, R32C:$rB),
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB),
           (ROTMAHv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
 
-def : Pat<(SPUvec_sra VECREG:$rA, R16C:$rB),
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R16C:$rB),
           (ROTMAHv8i16 VECREG:$rA,
                        (SFIr32 (XSHWr16 R16C:$rB), 0))>;
 
-def : Pat<(SPUvec_sra VECREG:$rA, R8C:$rB),
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB),
           (ROTMAHv8i16 VECREG:$rA,
                        (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
 
@@ -2950,10 +2950,10 @@ def ROTMAHIv8i16:
             (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
 
 def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
-          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>;
+          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
 
 def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
-          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (i32 uimm7:$val))>;
+          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
 
 def ROTMAHIr16:
     RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
@@ -2961,25 +2961,25 @@ def ROTMAHIr16:
       [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
 
 def : Pat<(sra R16C:$rA, (i32 imm:$val)),
-          (ROTMAHIr16 R16C:$rA, uimm7:$val)>;
+          (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
 
 def : Pat<(sra R16C:$rA, (i8 imm:$val)),
-          (ROTMAHIr16 R16C:$rA, uimm7:$val)>;
+          (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
 
 def ROTMAv4i32:
     RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
       "rotma\t$rT, $rA, $rB", RotateShift,
       [/* see patterns below - $rB must be negated */]>;
 
-def : Pat<(SPUvec_sra VECREG:$rA, R32C:$rB),
-          (ROTMAv4i32 (v4i32 VECREG:$rA), (SFIr32 R32C:$rB, 0))>;
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB),
+          (ROTMAv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>;
 
-def : Pat<(SPUvec_sra VECREG:$rA, R16C:$rB),
-          (ROTMAv4i32 (v4i32 VECREG:$rA),
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R16C:$rB),
+          (ROTMAv4i32 VECREG:$rA,
                       (SFIr32 (XSHWr16 R16C:$rB), 0))>;
 
-def : Pat<(SPUvec_sra VECREG:$rA, R8C:$rB),
-          (ROTMAv4i32 (v4i32 VECREG:$rA),
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB),
+          (ROTMAv4i32 VECREG:$rA,
                       (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
 
 def ROTMAr32:
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index 802628f..6216651 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -9,6 +9,17 @@
 // Cell SPU Instruction Operands:
 //===----------------------------------------------------------------------===//
 
+// TO_IMM32 - Convert an i8/i16 to i32.
+def TO_IMM32 : SDNodeXForm<imm, [{
+  return getI32Imm(N->getZExtValue());
+}]>;
+
+// TO_IMM16 - Convert an i8/i32 to i16.
+def TO_IMM16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16);
+}]>;
+
+
 def LO16 : SDNodeXForm<imm, [{
   unsigned val = N->getZExtValue();
   // Transformation function: get the low 16 bits.
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 4ba0cb1..8c78bab 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -436,7 +436,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
 
   // Prepare for debug frame info.
   bool hasDebugInfo = MMI && MMI->hasDebugInfo();
-  unsigned FrameLabelId = 0;
+  MCSymbol *FrameLabel = 0;
 
   // Move MBBI back to the beginning of the function.
   MBBI = MBB.begin();
@@ -452,8 +452,8 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
     FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
     if (hasDebugInfo) {
       // Mark effective beginning of when frame pointer becomes valid.
-      FrameLabelId = MMI->NextLabelID();
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(FrameLabelId);
+      FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(FrameLabel);
     }
 
     // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
@@ -500,7 +500,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
       // Show update of SP.
       MachineLocation SPDst(MachineLocation::VirtualFP);
       MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
-      Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
 
       // Add callee saved registers to move list.
       const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -510,16 +510,16 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
         if (Reg == SPU::R0) continue;
         MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
         MachineLocation CSSrc(Reg);
-        Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
       }
 
       // Mark effective beginning of when frame pointer is ready.
-      unsigned ReadyLabelId = MMI->NextLabelID();
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(ReadyLabelId);
+      MCSymbol *ReadyLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(ReadyLabel);
 
       MachineLocation FPDst(SPU::R1);
       MachineLocation FPSrc(MachineLocation::VirtualFP);
-      Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
     }
   } else {
     // This is a leaf function -- insert a branch hint iff there are
@@ -530,8 +530,8 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
       dl = MBBI->getDebugLoc();
 
       // Insert terminator label
-      unsigned BranchLabelId = MMI->NextLabelID();
-      BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addImm(BranchLabelId);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL))
+        .addSym(MMI->getContext().CreateTempSymbol());
     }
   }
 }
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index 6fe1026..ee2c7c8 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -31,6 +31,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
@@ -52,9 +53,8 @@ namespace {
     const MBlazeSubtarget *Subtarget;
   public:
     explicit MBlazeAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                              MCContext &Ctx, MCStreamer &Streamer, 
-                              const MCAsmInfo *T )
-      : AsmPrinter(O, TM, Ctx, Streamer, T) {
+                              MCStreamer &Streamer)
+      : AsmPrinter(O, TM, Streamer) {
       Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
     }
 
@@ -236,11 +236,11 @@ void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
       }
 
     case MachineOperand::MO_MachineBasicBlock:
-      O << *MO.getMBB()->getSymbol(OutContext);
+      O << *MO.getMBB()->getSymbol();
       return;
 
     case MachineOperand::MO_GlobalAddress:
-      O << *GetGlobalValueSymbol(MO.getGlobal());
+      O << *Mang->getSymbol(MO.getGlobal());
       break;
 
     case MachineOperand::MO_ExternalSymbol:
diff --git a/lib/Target/MBlaze/AsmPrinter/Makefile b/lib/Target/MBlaze/AsmPrinter/Makefile
index c8e4d8f..c44651c 100644
--- a/lib/Target/MBlaze/AsmPrinter/Makefile
+++ b/lib/Target/MBlaze/AsmPrinter/Makefile
@@ -12,6 +12,6 @@ LIBRARYNAME = LLVMMBlazeAsmPrinter
 
 # Hack: we need to include 'main' MBlaze target directory to grab
 # private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
index 7a35eb0..04ac6f1 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
@@ -32,6 +32,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -42,9 +43,8 @@ namespace {
   class MSP430AsmPrinter : public AsmPrinter {
   public:
     MSP430AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                     MCContext &Ctx, MCStreamer &Streamer,
-                     const MCAsmInfo *MAI)
-      : AsmPrinter(O, TM, Ctx, Streamer, MAI) {}
+                     MCStreamer &Streamer)
+      : AsmPrinter(O, TM, Streamer) {}
 
     virtual const char *getPassName() const {
       return "MSP430 Assembly Printer";
@@ -92,7 +92,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     O << MO.getImm();
     return;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_GlobalAddress: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
@@ -109,7 +109,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     if (Offset)
       O << '(' << Offset << '+';
 
-    O << *GetGlobalValueSymbol(MO.getGlobal());
+    O << *Mang->getSymbol(MO.getGlobal());
 
     if (Offset)
       O << ')';
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
index 2e5ef8e..32c6b04 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
@@ -20,6 +20,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/SmallString.h"
@@ -32,7 +33,7 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const {
   case 0: break;
   }
 
-  return Printer.GetGlobalValueSymbol(MO.getGlobal());
+  return Printer.Mang->getSymbol(MO.getGlobal());
 }
 
 MCSymbol *MSP430MCInstLower::
@@ -116,7 +117,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       break;
     case MachineOperand::MO_MachineBasicBlock:
       MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                         MO.getMBB()->getSymbol(Printer.OutContext), Ctx));
+                         MO.getMBB()->getSymbol(), Ctx));
       break;
     case MachineOperand::MO_GlobalAddress:
       MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
diff --git a/lib/Target/MSP430/AsmPrinter/Makefile b/lib/Target/MSP430/AsmPrinter/Makefile
index 4f340c6..a5293ab 100644
--- a/lib/Target/MSP430/AsmPrinter/Makefile
+++ b/lib/Target/MSP430/AsmPrinter/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMMSP430AsmPrinter
 
 # Hack: we need to include 'main' MSP430 target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index ef6defc..fb93706 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -12,8 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/Mangler.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
@@ -59,11 +63,10 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
 /// appendMangledName - Add the specified string in mangled form if it uses
 /// any unusual characters.
 static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
-                              const MCAsmInfo *MAI) {
+                              const MCAsmInfo &MAI) {
   // The first character is not allowed to be a number unless the target
   // explicitly allows it.
-  if ((MAI == 0 || !MAI->doesAllowNameToStartWithDigit()) &&
-      Str[0] >= '0' && Str[0] <= '9') {
+  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
     MangleLetter(OutName, Str[0]);
     Str = Str.substr(1);
   }
@@ -100,6 +103,8 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
   StringRef Name = GVName.toStringRef(TmpData);
   assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
   
+  const MCAsmInfo &MAI = Context.getAsmInfo();
+  
   // If the global name is not led with \1, add the appropriate prefixes.
   if (Name[0] == '\1') {
     Name = Name.substr(1);
@@ -134,7 +139,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
   // On systems that do not allow quoted names, we need to mangle most
   // strange characters.
   if (!MAI.doesAllowQuotesInName())
-    return appendMangledName(OutName, Name, &MAI);
+    return appendMangledName(OutName, Name, MAI);
   
   // Okay, the system allows quoted strings.  We can quote most anything, the
   // only characters that need escaping are " and \n.
@@ -142,6 +147,26 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
   return appendMangledQuotedName(OutName, Name);
 }
 
+/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
+/// a suffix on their name indicating the number of words of arguments they
+/// take.
+static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName,
+                                     const Function *F, const TargetData &TD) {
+  // Calculate arguments size total.
+  unsigned ArgWords = 0;
+  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+       AI != AE; ++AI) {
+    const Type *Ty = AI->getType();
+    // 'Dereference' type in case of byval parameter attribute
+    if (AI->hasByValAttr())
+      Ty = cast<PointerType>(Ty)->getElementType();
+    // Size should be aligned to DWORD boundary
+    ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+  }
+  
+  raw_svector_ostream(OutName) << '@' << ArgWords;
+}
+
 
 /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
 /// and the specified global variable's name.  If the global variable doesn't
@@ -156,16 +181,43 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
     PrefixTy = Mangler::LinkerPrivate;
   
   // If this global has a name, handle it simply.
-  if (GV->hasName())
-    return getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+  if (GV->hasName()) {
+    getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+  } else {
+    // Get the ID for the global, assigning a new one if we haven't got one
+    // already.
+    unsigned &ID = AnonGlobalIDs[GV];
+    if (ID == 0) ID = NextAnonGlobalID++;
   
-  // Get the ID for the global, assigning a new one if we haven't got one
-  // already.
-  unsigned &ID = AnonGlobalIDs[GV];
-  if (ID == 0) ID = NextAnonGlobalID++;
+    // Must mangle the global into a unique ID.
+    getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy);
+  }
   
-  // Must mangle the global into a unique ID.
-  getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy);
+  // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
+  // add it.
+  if (Context.getAsmInfo().hasMicrosoftFastStdCallMangling()) {
+    if (const Function *F = dyn_cast<Function>(GV)) {
+      CallingConv::ID CC = F->getCallingConv();
+    
+      // fastcall functions need to start with @.
+      // FIXME: This logic seems unlikely to be right.
+      if (CC == CallingConv::X86_FastCall) {
+        if (OutName[0] == '_')
+          OutName[0] = '@';
+        else
+          OutName.insert(OutName.begin(), '@');
+      }
+    
+      // fastcall and stdcall functions usually need @42 at the end to specify
+      // the argument info.
+      const FunctionType *FT = F->getFunctionType();
+      if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+          // "Pure" variadic functions do not receive @0 suffix.
+          (!FT->isVarArg() || FT->getNumParams() == 0 ||
+           (FT->getNumParams() == 1 && F->hasStructRetAttr())))
+        AddFastCallStdCallSuffix(OutName, F, TD);
+    }
+  }
 }
 
 /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
@@ -177,3 +229,16 @@ std::string Mangler::getNameWithPrefix(const GlobalValue *GV,
   getNameWithPrefix(Buf, GV, isImplicitlyPrivate);
   return std::string(Buf.begin(), Buf.end());
 }
+
+/// getSymbol - Return the MCSymbol for the specified global value.  This
+/// symbol is the main label that is the address of the global.
+MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
+  SmallString<60> NameStr;
+  getNameWithPrefix(NameStr, GV, false);
+  if (!GV->hasPrivateLinkage())
+    return Context.GetOrCreateSymbol(NameStr.str());
+  
+  return Context.GetOrCreateTemporarySymbol(NameStr.str());
+}
+
+
diff --git a/lib/Target/Mips/AsmPrinter/Makefile b/lib/Target/Mips/AsmPrinter/Makefile
index a2fecf4..b1efe9b 100644
--- a/lib/Target/Mips/AsmPrinter/Makefile
+++ b/lib/Target/Mips/AsmPrinter/Makefile
@@ -12,6 +12,6 @@ LIBRARYNAME = LLVMMipsAsmPrinter
 
 # Hack: we need to include 'main' Mips target directory to grab
 # private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index b8641c3..2a2d9dc 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -31,6 +31,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h" 
 #include "llvm/Target/TargetMachine.h"
@@ -50,9 +51,8 @@ namespace {
     const MipsSubtarget *Subtarget;
   public:
     explicit MipsAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, 
-                            MCContext &Ctx, MCStreamer &Streamer,
-                            const MCAsmInfo *T)
-      : AsmPrinter(O, TM, Ctx, Streamer, T) {
+                            MCStreamer &Streamer)
+      : AsmPrinter(O, TM, Streamer) {
       Subtarget = &TM.getSubtarget<MipsSubtarget>();
     }
 
@@ -277,11 +277,11 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
       break;
 
     case MachineOperand::MO_MachineBasicBlock:
-      O << *MO.getMBB()->getSymbol(OutContext);
+      O << *MO.getMBB()->getSymbol();
       return;
 
     case MachineOperand::MO_GlobalAddress:
-      O << *GetGlobalValueSymbol(MO.getGlobal());
+      O << *Mang->getSymbol(MO.getGlobal());
       break;
 
     case MachineOperand::MO_ExternalSymbol:
diff --git a/lib/Target/PIC16/AsmPrinter/Makefile b/lib/Target/PIC16/AsmPrinter/Makefile
index f4db57e..e3c0684 100644
--- a/lib/Target/PIC16/AsmPrinter/Makefile
+++ b/lib/Target/PIC16/AsmPrinter/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMPIC16AsmPrinter
 
 # Hack: we need to include 'main' pic16 target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index 44a6cc0..b6eceb3 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -35,11 +36,10 @@ using namespace llvm;
 #include "PIC16GenAsmWriter.inc"
 
 PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                                 MCContext &Ctx, MCStreamer &Streamer,
-                                 const MCAsmInfo *T)
-: AsmPrinter(O, TM, Ctx, Streamer, T), DbgInfo(O, T) {
+                                 MCStreamer &Streamer)
+: AsmPrinter(O, TM, Streamer), DbgInfo(O, TM.getMCAsmInfo()) {
   PTLI = static_cast<PIC16TargetLowering*>(TM.getTargetLowering());
-  PMAI = static_cast<const PIC16MCAsmInfo*>(T);
+  PMAI = static_cast<const PIC16MCAsmInfo*>(TM.getMCAsmInfo());
   PTOF = (PIC16TargetObjectFile *)&PTLI->getObjFileLowering();
 }
 
@@ -179,7 +179,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
       return;
 
     case MachineOperand::MO_GlobalAddress: {
-      MCSymbol *Sym = GetGlobalValueSymbol(MO.getGlobal());
+      MCSymbol *Sym = Mang->getSymbol(MO.getGlobal());
       // FIXME: currently we do not have a memcpy def coming in the module
       // by any chance, as we do not link in those as .bc lib. So these calls
       // are always external and it is safe to emit an extern.
@@ -206,7 +206,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
       break;
     }
     case MachineOperand::MO_MachineBasicBlock:
-      O << *MO.getMBB()->getSymbol(OutContext);
+      O << *MO.getMBB()->getSymbol();
       return;
 
     default:
@@ -312,7 +312,7 @@ void PIC16AsmPrinter::EmitFunctionDecls(Module &M) {
     if (!I->isDeclaration() && !I->hasExternalLinkage())
       continue;
 
-    MCSymbol *Sym = GetGlobalValueSymbol(I);
+    MCSymbol *Sym = Mang->getSymbol(I);
     
     // Do not emit memcpy, memset, and memmove here.
     // Calls to these routines can be generated in two ways,
@@ -342,7 +342,7 @@ void PIC16AsmPrinter::EmitUndefinedVars(Module &M) {
 
   O << "\n" << MAI->getCommentString() << "Imported Variables - BEGIN" << "\n";
   for (unsigned j = 0; j < Items.size(); j++)
-    O << MAI->getExternDirective() << *GetGlobalValueSymbol(Items[j]) << "\n";
+    O << MAI->getExternDirective() << *Mang->getSymbol(Items[j]) << "\n";
   O << MAI->getCommentString() << "Imported Variables - END" << "\n";
 }
 
@@ -353,7 +353,7 @@ void PIC16AsmPrinter::EmitDefinedVars(Module &M) {
 
   O << "\n" << MAI->getCommentString() << "Exported Variables - BEGIN" << "\n";
   for (unsigned j = 0; j < Items.size(); j++)
-    O << MAI->getGlobalDirective() << *GetGlobalValueSymbol(Items[j]) << "\n";
+    O << MAI->getGlobalDirective() << *Mang->getSymbol(Items[j]) << "\n";
   O <<  MAI->getCommentString() << "Exported Variables - END" << "\n";
 }
 
@@ -432,7 +432,7 @@ void PIC16AsmPrinter::EmitInitializedDataSection(const PIC16Section *S) {
     for (unsigned j = 0; j < Items.size(); j++) {
       Constant *C = Items[j]->getInitializer();
       int AddrSpace = Items[j]->getType()->getAddressSpace();
-      O << *GetGlobalValueSymbol(Items[j]);
+      O << *Mang->getSymbol(Items[j]);
       EmitGlobalConstant(C, AddrSpace);
    }
 }
@@ -451,7 +451,7 @@ EmitUninitializedDataSection(const PIC16Section *S) {
       Constant *C = Items[j]->getInitializer();
       const Type *Ty = C->getType();
       unsigned Size = TD->getTypeAllocSize(Ty);
-      O << *GetGlobalValueSymbol(Items[j]) << " RES " << Size << "\n";
+      O << *Mang->getSymbol(Items[j]) << " RES " << Size << "\n";
     }
 }
 
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index 77b6e63..519be4c 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -31,8 +31,7 @@ namespace llvm {
   class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
   public:
     explicit PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                             MCContext &Ctx, MCStreamer &Streamer,
-                             const MCAsmInfo *T);
+                             MCStreamer &Streamer);
   private:
     virtual const char *getPassName() const {
       return "PIC16 Assembly Printer";
diff --git a/lib/Target/PowerPC/AsmPrinter/Makefile b/lib/Target/PowerPC/AsmPrinter/Makefile
index 269ef92..bd5dce1 100644
--- a/lib/Target/PowerPC/AsmPrinter/Makefile
+++ b/lib/Target/PowerPC/AsmPrinter/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMPowerPCAsmPrinter
 
 # Hack: we need to include 'main' PowerPC target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 3c7dfaf..ed6fc9d 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -34,6 +34,7 @@
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -60,9 +61,8 @@ namespace {
     uint64_t LabelID;
   public:
     explicit PPCAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                           MCContext &Ctx, MCStreamer &Streamer,
-                           const MCAsmInfo *T)
-      : AsmPrinter(O, TM, Ctx, Streamer, T),
+                           MCStreamer &Streamer)
+      : AsmPrinter(O, TM, Streamer),
         Subtarget(TM.getSubtarget<PPCSubtarget>()), LabelID(0) {}
 
     virtual const char *getPassName() const {
@@ -198,10 +198,11 @@ namespace {
           if (GV->isDeclaration() || GV->isWeakForLinker()) {
             // Dynamically-resolved functions need a stub for the function.
             MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
-            MCSymbol *&StubSym =
+            MachineModuleInfoImpl::StubValueTy &StubSym =
               MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
-            if (StubSym == 0)
-              StubSym = GetGlobalValueSymbol(GV);
+            if (StubSym.getPointer() == 0)
+              StubSym = MachineModuleInfoImpl::
+                StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
             O << *Sym;
             return;
           }
@@ -212,10 +213,11 @@ namespace {
           TempNameStr += StringRef("$stub");
           
           MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
-          MCSymbol *&StubSym =
+          MachineModuleInfoImpl::StubValueTy &StubSym =
             MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
-          if (StubSym == 0)
-            StubSym = GetExternalSymbolSymbol(MO.getSymbolName());
+          if (StubSym.getPointer() == 0)
+            StubSym = MachineModuleInfoImpl::
+              StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
           O << *Sym;
           return;
         }
@@ -300,10 +302,8 @@ namespace {
 
     void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo) {
       const MachineOperand &MO = MI->getOperand(OpNo);
-
       assert(MO.getType() == MachineOperand::MO_GlobalAddress);
-
-      const MCSymbol *Sym = GetGlobalValueSymbol(MO.getGlobal());
+      const MCSymbol *Sym = Mang->getSymbol(MO.getGlobal());
 
       // Map symbol -> label of TOC entry.
       const MCSymbol *&TOCEntry = TOC[Sym];
@@ -323,9 +323,8 @@ namespace {
   class PPCLinuxAsmPrinter : public PPCAsmPrinter {
   public:
     explicit PPCLinuxAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                                MCContext &Ctx, MCStreamer &Streamer,
-                                const MCAsmInfo *T)
-      : PPCAsmPrinter(O, TM, Ctx, Streamer, T) {}
+                                MCStreamer &Streamer)
+      : PPCAsmPrinter(O, TM, Streamer) {}
 
     virtual const char *getPassName() const {
       return "Linux PPC Assembly Printer";
@@ -349,9 +348,8 @@ namespace {
     formatted_raw_ostream &OS;
   public:
     explicit PPCDarwinAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                                 MCContext &Ctx, MCStreamer &Streamer,
-                                 const MCAsmInfo *T)
-      : PPCAsmPrinter(O, TM, Ctx, Streamer, T), OS(O) {}
+                                 MCStreamer &Streamer)
+      : PPCAsmPrinter(O, TM, Streamer), OS(O) {}
 
     virtual const char *getPassName() const {
       return "Darwin PPC Assembly Printer";
@@ -380,7 +378,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
     llvm_unreachable("printOp() does not handle immediate values");
 
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_JumpTableIndex:
     O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
@@ -404,10 +402,11 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
     MCSymbol *NLPSym = 
       OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
                                    MO.getSymbolName()+"$non_lazy_ptr");
-    MCSymbol *&StubSym = 
+    MachineModuleInfoImpl::StubValueTy &StubSym = 
       MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
-    if (StubSym == 0)
-      StubSym = GetExternalSymbolSymbol(MO.getSymbolName());
+    if (StubSym.getPointer() == 0)
+      StubSym = MachineModuleInfoImpl::
+        StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
     
     O << *NLPSym;
     return;
@@ -422,24 +421,27 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
         (GV->isDeclaration() || GV->isWeakForLinker())) {
       if (!GV->hasHiddenVisibility()) {
         SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        MCSymbol *&StubSym = 
-       MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(SymToPrint);
-        if (StubSym == 0)
-          StubSym = GetGlobalValueSymbol(GV);
+        MachineModuleInfoImpl::StubValueTy &StubSym = 
+          MMI->getObjFileInfo<MachineModuleInfoMachO>()
+            .getGVStubEntry(SymToPrint);
+        if (StubSym.getPointer() == 0)
+          StubSym = MachineModuleInfoImpl::
+            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
       } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
                  GV->hasAvailableExternallyLinkage()) {
         SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
         
-        MCSymbol *&StubSym = 
+        MachineModuleInfoImpl::StubValueTy &StubSym = 
           MMI->getObjFileInfo<MachineModuleInfoMachO>().
                     getHiddenGVStubEntry(SymToPrint);
-        if (StubSym == 0)
-          StubSym = GetGlobalValueSymbol(GV);
+        if (StubSym.getPointer() == 0)
+          StubSym = MachineModuleInfoImpl::
+            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
       } else {
-        SymToPrint = GetGlobalValueSymbol(GV);
+        SymToPrint = Mang->getSymbol(GV);
       }
     } else {
-      SymToPrint = GetGlobalValueSymbol(GV);
+      SymToPrint = Mang->getSymbol(GV);
     }
     
     O << *SymToPrint;
@@ -704,7 +706,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
       EmitAlignment(4);
       
       const MCSymbol *Stub = Stubs[i].first;
-      const MCSymbol *RawSym = Stubs[i].second;
+      const MCSymbol *RawSym = Stubs[i].second.getPointer();
       const MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
       const MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
                                            
@@ -738,7 +740,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
                               16, SectionKind::getText());
   for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
     const MCSymbol *Stub = Stubs[i].first;
-    const MCSymbol *RawSym = Stubs[i].second;
+    const MCSymbol *RawSym = Stubs[i].second.getPointer();
     const MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
 
     OutStreamer.SwitchSection(StubSection);
@@ -781,8 +783,9 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
          E = Personalities.end(); I != E; ++I) {
       if (*I) {
         MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
-        MCSymbol *&StubSym = MMIMacho.getGVStubEntry(NLPSym);
-        StubSym = GetGlobalValueSymbol(*I);
+        MachineModuleInfoImpl::StubValueTy &StubSym =
+          MMIMacho.getGVStubEntry(NLPSym);
+        StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
       }
     }
   }
@@ -797,10 +800,24 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
     EmitAlignment(isPPC64 ? 3 : 2);
     
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      O << *Stubs[i].first << ":\n";
-      O << "\t.indirect_symbol " << *Stubs[i].second << '\n';
-      O << (isPPC64 ? "\t.quad\t0\n" : "\t.long\t0\n");
+      // L_foo$stub:
+      OutStreamer.EmitLabel(Stubs[i].first);
+      //   .indirect_symbol _foo
+      MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+      OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+      if (MCSym.getInt())
+        // External to current translation unit.
+        OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+      else
+        // Internal to current translation unit.
+        OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                      OutContext),
+                              isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
     }
+
+    Stubs.clear();
+    OutStreamer.AddBlankLine();
   }
 
   Stubs = MMIMacho.GetHiddenGVStubList();
@@ -809,9 +826,17 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
     EmitAlignment(isPPC64 ? 3 : 2);
     
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      O << *Stubs[i].first << ":\n";
-      O << (isPPC64 ? "\t.quad\t" : "\t.long\t") << *Stubs[i].second << '\n';
+      // L_foo$stub:
+      OutStreamer.EmitLabel(Stubs[i].first);
+      //   .long _foo
+      OutStreamer.EmitValue(MCSymbolRefExpr::
+                            Create(Stubs[i].second.getPointer(),
+                                   OutContext),
+                            isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
     }
+
+    Stubs.clear();
+    OutStreamer.AddBlankLine();
   }
 
   // Funny Darwin hack: This flag tells the linker that no global symbols
@@ -824,21 +849,18 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   return AsmPrinter::doFinalization(M);
 }
 
-
-
 /// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
 /// for a MachineFunction to the given output stream, in a format that the
 /// Darwin assembler can deal with.
 ///
 static AsmPrinter *createPPCAsmPrinterPass(formatted_raw_ostream &o,
                                            TargetMachine &tm,
-                                           MCContext &Ctx, MCStreamer &Streamer,
-                                           const MCAsmInfo *tai) {
+                                           MCStreamer &Streamer) {
   const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
 
   if (Subtarget->isDarwin())
-    return new PPCDarwinAsmPrinter(o, tm, Ctx, Streamer, tai);
-  return new PPCLinuxAsmPrinter(o, tm, Ctx, Streamer, tai);
+    return new PPCDarwinAsmPrinter(o, tm, Streamer);
+  return new PPCLinuxAsmPrinter(o, tm, Streamer);
 }
 
 // Force static initialization.
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 327470d..f7c27d4 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -30,6 +30,7 @@ namespace {
   class PPCCodeEmitter : public MachineFunctionPass {
     TargetMachine &TM;
     JITCodeEmitter &MCE;
+    MachineModuleInfo *MMI;
     
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<MachineModuleInfo>();
@@ -87,7 +88,8 @@ bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
           MF.getTarget().getRelocationModel() != Reloc::Static) &&
          "JIT relocation model must be set to static or default!");
 
-  MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>());
+  MMI = &getAnalysis<MachineModuleInfo>();
+  MCE.setModuleInfo(MMI);
   do {
     MovePCtoLROffset = 0;
     MCE.startFunction(MF);
@@ -110,7 +112,7 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
       break;
     case TargetOpcode::DBG_LABEL:
     case TargetOpcode::EH_LABEL:
-      MCE.emitLabel(MI.getOperand(0).getImm());
+      MCE.emitLabel(MI.getOperand(0).getMCSymbol());
       break;
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index aeaa7c6..2c072c1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -13,9 +13,9 @@
 
 #include "PPCISelLowering.h"
 #include "PPCMachineFunctionInfo.h"
+#include "PPCPerfectShuffle.h"
 #include "PPCPredicates.h"
 #include "PPCTargetMachine.h"
-#include "PPCPerfectShuffle.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
@@ -60,10 +60,10 @@ cl::desc("enable preincrement load/store generation on PPC (experimental)"),
 static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
   if (TM.getSubtargetImpl()->isDarwin())
     return new TargetLoweringObjectFileMachO();
+
   return new TargetLoweringObjectFileELF();
 }
 
-
 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
 
@@ -4216,7 +4216,7 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
 /// lower, do it, otherwise return null.
 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
-                                                     SelectionDAG &DAG) {
+                                                   SelectionDAG &DAG) {
   // If this is a lowered altivec predicate compare, CompareOpc is set to the
   // opcode number of the comparison.
   DebugLoc dl = Op.getDebugLoc();
@@ -4228,8 +4228,8 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   // If this is a non-dot comparison, make the VCMP node and we are done.
   if (!isDot) {
     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
-                                Op.getOperand(1), Op.getOperand(2),
-                                DAG.getConstant(CompareOpc, MVT::i32));
+                              Op.getOperand(1), Op.getOperand(2),
+                              DAG.getConstant(CompareOpc, MVT::i32));
     return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Tmp);
   }
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 31bca16..6e7880e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1287,7 +1287,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
        UnwindTablesMandatory;
   
   // Prepare for frame info.
-  unsigned FrameLabelId = 0;
+  MCSymbol *FrameLabel = 0;
 
   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
   // process it.
@@ -1446,33 +1446,33 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
   // reverse order.
   if (needsFrameMoves) {
     // Mark effective beginning of when frame pointer becomes valid.
-    FrameLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId);
+    FrameLabel = MMI->getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(FrameLabel);
   
     // Show update of SP.
     if (NegFrameSize) {
       MachineLocation SPDst(MachineLocation::VirtualFP);
       MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
-      Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
     } else {
       MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
-      Moves.push_back(MachineMove(FrameLabelId, SP, SP));
+      Moves.push_back(MachineMove(FrameLabel, SP, SP));
     }
     
     if (HasFP) {
       MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
       MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
-      Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
     }
 
     if (MustSaveLR) {
       MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
       MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
-      Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
+      Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
     }
   }
 
-  unsigned ReadyLabelId = 0;
+  MCSymbol *ReadyLabel = 0;
 
   // If there is a frame pointer, copy R1 into R31
   if (HasFP) {
@@ -1487,20 +1487,20 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
     }
 
     if (needsFrameMoves) {
-      ReadyLabelId = MMI->NextLabelID();
+      ReadyLabel = MMI->getContext().CreateTempSymbol();
 
       // Mark effective beginning of when frame pointer is ready.
-      BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(ReadyLabel);
 
       MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
                                     (isPPC64 ? PPC::X1 : PPC::R1));
       MachineLocation FPSrc(MachineLocation::VirtualFP);
-      Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
     }
   }
 
   if (needsFrameMoves) {
-    unsigned LabelId = HasFP ? ReadyLabelId : FrameLabelId;
+    MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
 
     // Add callee saved registers to move list.
     const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -1510,7 +1510,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
       MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
       MachineLocation CSSrc(Reg);
-      Moves.push_back(MachineMove(LabelId, CSDst, CSSrc));
+      Moves.push_back(MachineMove(Label, CSDst, CSSrc));
     }
   }
 }
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 4fd46a8..052a575 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1819,3 +1819,27 @@ int test2(int mainType, int subType) {
 }
 
 //===---------------------------------------------------------------------===//
+
+The following test case (from PR6576):
+
+define i32 @mul(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %cond1 = icmp eq i32 %b, 0                      ; <i1> [#uses=1]
+ br i1 %cond1, label %exit, label %bb.nph
+bb.nph:                                           ; preds = %entry
+ %tmp = mul i32 %b, %a                           ; <i32> [#uses=1]
+ ret i32 %tmp
+exit:                                             ; preds = %entry
+ ret i32 0
+}
+
+could be reduced to:
+
+define i32 @mul(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %tmp = mul i32 %b, %a
+ ret i32 %tmp
+}
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/Sparc/AsmPrinter/Makefile b/lib/Target/Sparc/AsmPrinter/Makefile
index a856828..fe47538 100644
--- a/lib/Target/Sparc/AsmPrinter/Makefile
+++ b/lib/Target/Sparc/AsmPrinter/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMSparcAsmPrinter
 
 # Hack: we need to include 'main' Sparc target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index f6753a6..3e40dbb 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormattedStream.h"
@@ -30,9 +31,8 @@ namespace {
   class SparcAsmPrinter : public AsmPrinter {
   public:
     explicit SparcAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                             MCContext &Ctx, MCStreamer &Streamer,
-                             const MCAsmInfo *T)
-      : AsmPrinter(O, TM, Ctx, Streamer, T) {}
+                             MCStreamer &Streamer)
+      : AsmPrinter(O, TM, Streamer) {}
 
     virtual const char *getPassName() const {
       return "Sparc Assembly Printer";
@@ -84,10 +84,10 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     O << (int)MO.getImm();
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_GlobalAddress:
-    O << *GetGlobalValueSymbol(MO.getGlobal());
+    O << *Mang->getSymbol(MO.getGlobal());
     break;
   case MachineOperand::MO_ExternalSymbol:
     O << MO.getSymbolName();
diff --git a/lib/Target/SystemZ/AsmPrinter/Makefile b/lib/Target/SystemZ/AsmPrinter/Makefile
index 9a350df..0f90ed3 100644
--- a/lib/Target/SystemZ/AsmPrinter/Makefile
+++ b/lib/Target/SystemZ/AsmPrinter/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMSystemZAsmPrinter
 
 # Hack: we need to include 'main' SystemZ target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index 7a9e8dd..b29dfcd 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -29,6 +29,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -40,9 +41,8 @@ namespace {
   class SystemZAsmPrinter : public AsmPrinter {
   public:
     SystemZAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                      MCContext &Ctx, MCStreamer &Streamer,
-                      const MCAsmInfo *MAI)
-      : AsmPrinter(O, TM, Ctx, Streamer, MAI) {}
+                      MCStreamer &Streamer)
+      : AsmPrinter(O, TM, Streamer) {}
 
     virtual const char *getPassName() const {
       return "SystemZ Assembly Printer";
@@ -89,11 +89,11 @@ void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum){
     O << MO.getImm();
     return;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
-    O << *GetGlobalValueSymbol(GV);
+    O << *Mang->getSymbol(GV);
 
     // Assemble calls via PLT for externally visible symbols if PIC.
     if (TM.getRelocationModel() == Reloc::PIC_ &&
@@ -144,7 +144,7 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     O << MO.getImm();
     return;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_JumpTableIndex:
     O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
@@ -158,7 +158,7 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     printOffset(MO.getOffset());
     break;
   case MachineOperand::MO_GlobalAddress:
-    O << *GetGlobalValueSymbol(MO.getGlobal());
+    O << *Mang->getSymbol(MO.getGlobal());
     break;
   case MachineOperand::MO_ExternalSymbol: {
     O << *GetExternalSymbolSymbol(MO.getSymbolName());
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 8c12039..a093e2d 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -19,6 +19,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
@@ -290,45 +291,38 @@ TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
   return DataSection;
 }
 
-/// getSymbolForDwarfGlobalReference - Return an MCExpr to use for a
+/// getExprForDwarfGlobalReference - Return an MCExpr to use for a
 /// reference to the specified global variable from exception
 /// handling information.
 const MCExpr *TargetLoweringObjectFile::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                             MachineModuleInfo *MMI, unsigned Encoding) const {
-  // FIXME: Use GetGlobalValueSymbol.
-  SmallString<128> Name;
-  Mang->getNameWithPrefix(Name, GV, false);
-  const MCSymbol *Sym;
-  
-  if (GV->hasPrivateLinkage())
-    Sym = getContext().GetOrCreateTemporarySymbol(Name.str());
-  else
-    Sym = getContext().GetOrCreateSymbol(Name.str());
-
-  return getSymbolForDwarfReference(Sym, MMI, Encoding);
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
+  const MCSymbol *Sym = Mang->getSymbol(GV);
+  return getExprForDwarfReference(Sym, Mang, MMI, Encoding, Streamer);
 }
 
 const MCExpr *TargetLoweringObjectFile::
-getSymbolForDwarfReference(const MCSymbol *Sym, MachineModuleInfo *MMI,
-                           unsigned Encoding) const {
+getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
+                         MachineModuleInfo *MMI, unsigned Encoding,
+                         MCStreamer &Streamer) const {
   const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
 
   switch (Encoding & 0xF0) {
   default:
     llvm_report_error("We do not support this DWARF encoding yet!");
-    break;
   case dwarf::DW_EH_PE_absptr:
     // Do nothing special
-    break;
-  case dwarf::DW_EH_PE_pcrel:
-    // FIXME: PCSymbol
-    const MCExpr *PC = MCSymbolRefExpr::Create(".", getContext());
-    Res = MCBinaryExpr::CreateSub(Res, PC, getContext());
-    break;
+    return Res;
+  case dwarf::DW_EH_PE_pcrel: {
+    // Emit a label to the streamer for the current position.  This gives us
+    // .-foo addressing.
+    MCSymbol *PCSym = getContext().GetOrCreateTemporarySymbol();
+    Streamer.EmitLabel(PCSym);
+    const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+    return MCBinaryExpr::CreateSub(Res, PC, getContext());
+  }
   }
-
-  return Res;
 }
 
 unsigned TargetLoweringObjectFile::getPersonalityEncoding() const {
diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile
index 25fb0a2..fb97607 100644
--- a/lib/Target/X86/AsmParser/Makefile
+++ b/lib/Target/X86/AsmParser/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMX86AsmParser
 
 # Hack: we need to include 'main' x86 target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 84d7bb7..dde86fb 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -542,6 +542,15 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
     }
   }
 
+  // FIXME: Hack to handle recognizing s{hr,ar,hl}? $1.
+  if ((Name.startswith("shr") || Name.startswith("sar") ||
+       Name.startswith("shl")) &&
+      Operands.size() == 3 &&
+      static_cast<X86Operand*>(Operands[1])->isImm() &&
+      isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) &&
+      cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1)
+    Operands.erase(Operands.begin() + 1);
+
   return false;
 }
 
diff --git a/lib/Target/X86/AsmPrinter/Makefile b/lib/Target/X86/AsmPrinter/Makefile
index 2368761..c82aa33 100644
--- a/lib/Target/X86/AsmPrinter/Makefile
+++ b/lib/Target/X86/AsmPrinter/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMX86AsmPrinter
 
 # Hack: we need to include 'main' x86 target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index caf84b6..c3dcf8e 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -53,30 +53,6 @@ void X86AsmPrinter::PrintPICBaseSymbol() const {
                                                                     OutContext);
 }
 
-MCSymbol *X86AsmPrinter::GetGlobalValueSymbol(const GlobalValue *GV) const {
-  SmallString<60> NameStr;
-  Mang->getNameWithPrefix(NameStr, GV, false);
-  MCSymbol *Symb;
-  if (GV->hasPrivateLinkage())
-    Symb = OutContext.GetOrCreateTemporarySymbol(NameStr.str());
-  else
-    Symb = OutContext.GetOrCreateSymbol(NameStr.str());
-
-  if (Subtarget->isTargetCygMing()) {
-    X86COFFMachineModuleInfo &COFFMMI =
-      MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-    COFFMMI.DecorateCygMingName(Symb, OutContext, GV, *TM.getTargetData());
-
-    // Save function name for later type emission.
-    if (const Function *F = dyn_cast<Function>(GV))
-      if (F->isDeclaration())
-        COFFMMI.addExternalFunction(Symb->getName());
-
-  }
-
-  return Symb;
-}
-
 /// runOnMachineFunction - Emit the function body.
 ///
 bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -124,7 +100,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
              MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
       GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
     else
-      GVSym = GetGlobalValueSymbol(GV);
+      GVSym = Mang->getSymbol(GV);
 
     // Handle dllimport linkage.
     if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
@@ -133,24 +109,25 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
     if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
       MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-      
-      MCSymbol *&StubSym = 
+      MachineModuleInfoImpl::StubValueTy &StubSym = 
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
-      if (StubSym == 0)
-        StubSym = GetGlobalValueSymbol(GV);
-      
+      if (StubSym.getPointer() == 0)
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
     } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
       MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-      MCSymbol *&StubSym =
+      MachineModuleInfoImpl::StubValueTy &StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
-      if (StubSym == 0)
-        StubSym = GetGlobalValueSymbol(GV);
+      if (StubSym.getPointer() == 0)
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
     } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
       MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
-      MCSymbol *&StubSym =
+      MachineModuleInfoImpl::StubValueTy &StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
-      if (StubSym == 0)
-        StubSym = GetGlobalValueSymbol(GV);
+      if (StubSym.getPointer() == 0)
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
     }
     
     // If the name begins with a dollar-sign, enclose it in parens.  We do this
@@ -170,13 +147,15 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO) {
       TempNameStr += StringRef("$stub");
       
       MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
-      MCSymbol *&StubSym =
+      MachineModuleInfoImpl::StubValueTy &StubSym =
         MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
-      if (StubSym == 0) {
+      if (StubSym.getPointer() == 0) {
         TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
-        StubSym = OutContext.GetOrCreateSymbol(TempNameStr.str());
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(OutContext.GetOrCreateSymbol(TempNameStr.str()),
+                      true);
       }
-      SymToPrint = StubSym;
+      SymToPrint = StubSym.getPointer();
     } else {
       SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
     }
@@ -235,7 +214,7 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
     O << MO.getImm();
     return;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     return;
   case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ExternalSymbol:
@@ -480,6 +459,11 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
+void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
+  if (Subtarget->isTargetDarwin())
+    OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
 
 void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
@@ -507,7 +491,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
         // L_foo$stub:
         OutStreamer.EmitLabel(Stubs[i].first);
         //   .indirect_symbol _foo
-        OutStreamer.EmitSymbolAttribute(Stubs[i].second, MCSA_IndirectSymbol);
+        OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
+                                        MCSA_IndirectSymbol);
         // hlt; hlt; hlt; hlt; hlt     hlt = 0xf4 = -12.
         const char HltInsts[] = { -12, -12, -12, -12, -12 };
         OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
@@ -530,9 +515,18 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
         // L_foo$non_lazy_ptr:
         OutStreamer.EmitLabel(Stubs[i].first);
         // .indirect_symbol _foo
-        OutStreamer.EmitSymbolAttribute(Stubs[i].second, MCSA_IndirectSymbol);
+        MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+        OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),
+                                        MCSA_IndirectSymbol);
         // .long 0
-        OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+        if (MCSym.getInt())
+          // External to current translation unit.
+          OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+        else
+          // Internal to current translation unit.
+          OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                        OutContext),
+                                4/*size*/, 0/*addrspace*/);
       }
       Stubs.clear();
       OutStreamer.AddBlankLine();
@@ -547,8 +541,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
         // L_foo$non_lazy_ptr:
         OutStreamer.EmitLabel(Stubs[i].first);
         // .long _foo
-        OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second,
-                                                      OutContext),
+        OutStreamer.EmitValue(MCSymbolRefExpr::
+                              Create(Stubs[i].second.getPointer(),
+                                     OutContext),
                               4/*size*/, 0/*addrspace*/);
       }
       Stubs.clear();
@@ -584,15 +579,13 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
         static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering());
 
       for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
-        if (I->hasDLLExportLinkage()) {
-          MCSymbol *Sym = GetGlobalValueSymbol(I);
-          DLLExportedFns.push_back(Sym);
-        }
+        if (I->hasDLLExportLinkage())
+          DLLExportedFns.push_back(Mang->getSymbol(I));
 
       for (Module::const_global_iterator I = M.global_begin(),
              E = M.global_end(); I != E; ++I)
         if (I->hasDLLExportLinkage())
-          DLLExportedGlobals.push_back(GetGlobalValueSymbol(I));
+          DLLExportedGlobals.push_back(Mang->getSymbol(I));
 
       // Output linker support code for dllexported globals on windows.
       if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
@@ -624,7 +617,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
         O << *Stubs[i].first << ":\n"
           << (TD->getPointerSize() == 8 ?
               MAI->getData64bitsDirective() : MAI->getData32bitsDirective())
-          << *Stubs[i].second << '\n';
+          << *Stubs[i].second.getPointer() << '\n';
 
       Stubs.clear();
     }
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index 039214a..28c25f9 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -36,9 +36,8 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
   const X86Subtarget *Subtarget;
  public:
   explicit X86AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                         MCContext &Ctx, MCStreamer &Streamer,
-                         const MCAsmInfo *T)
-    : AsmPrinter(O, TM, Ctx, Streamer, T) {
+                         MCStreamer &Streamer)
+    : AsmPrinter(O, TM, Streamer) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
   }
 
@@ -55,13 +54,13 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
     AsmPrinter::getAnalysisUsage(AU);
   }
 
-  
+  virtual void EmitStartOfAsmFile(Module &M);
+
   virtual void EmitEndOfAsmFile(Module &M);
   
   virtual void EmitInstruction(const MachineInstr *MI);
   
   void printSymbolOperand(const MachineOperand &MO);
-  virtual MCSymbol *GetGlobalValueSymbol(const GlobalValue *GV) const;
 
   // These methods are used by the tablegen'erated instruction printer.
   void printOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index b8a6eeb..cbfc57a 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -16,13 +16,13 @@
 #include "X86AsmPrinter.h"
 #include "X86COFFMachineModuleInfo.h"
 #include "X86MCAsmInfo.h"
-#include "X86MCTargetExpr.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
@@ -54,7 +54,12 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
 
   SmallString<128> Name;
   
-  if (MO.isGlobal()) {
+  if (!MO.isGlobal()) {
+    assert(MO.isSymbol());
+    Name += AsmPrinter.MAI->getGlobalPrefix();
+    Name += MO.getSymbolName();
+  } else {    
+    const GlobalValue *GV = MO.getGlobal();
     bool isImplicitlyPrivate = false;
     if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
         MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
@@ -62,18 +67,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
         MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
       isImplicitlyPrivate = true;
     
-    const GlobalValue *GV = MO.getGlobal();
     Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
-  
-    if (getSubtarget().isTargetCygMing()) {
-      X86COFFMachineModuleInfo &COFFMMI = 
-        AsmPrinter.MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-      COFFMMI.DecorateCygMingName(Name, GV, *AsmPrinter.TM.getTargetData());
-    }
-  } else {
-    assert(MO.isSymbol());
-    Name += AsmPrinter.MAI->getGlobalPrefix();
-    Name += MO.getSymbolName();
   }
 
   // If the target flags on the operand changes the name of the symbol, do that
@@ -91,35 +85,49 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
     Name += "$non_lazy_ptr";
     MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str());
 
-    MCSymbol *&StubSym = getMachOMMI().getGVStubEntry(Sym);
-    if (StubSym == 0) {
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI().getGVStubEntry(Sym);
+    if (StubSym.getPointer() == 0) {
       assert(MO.isGlobal() && "Extern symbol not handled yet");
-      StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal());
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+                    !MO.getGlobal()->hasInternalLinkage());
     }
     return Sym;
   }
   case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
     Name += "$non_lazy_ptr";
     MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str());
-    MCSymbol *&StubSym = getMachOMMI().getHiddenGVStubEntry(Sym);
-    if (StubSym == 0) {
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI().getHiddenGVStubEntry(Sym);
+    if (StubSym.getPointer() == 0) {
       assert(MO.isGlobal() && "Extern symbol not handled yet");
-      StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal());
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+                    !MO.getGlobal()->hasInternalLinkage());
     }
     return Sym;
   }
   case X86II::MO_DARWIN_STUB: {
     Name += "$stub";
     MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str());
-    MCSymbol *&StubSym = getMachOMMI().getFnStubEntry(Sym);
-    if (StubSym)
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI().getFnStubEntry(Sym);
+    if (StubSym.getPointer())
       return Sym;
     
     if (MO.isGlobal()) {
-      StubSym = AsmPrinter.GetGlobalValueSymbol(MO.getGlobal());
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+                    !MO.getGlobal()->hasInternalLinkage());
     } else {
       Name.erase(Name.end()-5, Name.end());
-      StubSym = Ctx.GetOrCreateTemporarySymbol(Name.str());
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(Ctx.GetOrCreateTemporarySymbol(Name.str()), false);
     }
     return Sym;
   }
@@ -133,7 +141,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   // FIXME: We would like an efficient form for this, so we don't have to do a
   // lot of extra uniquing.
   const MCExpr *Expr = 0;
-  X86MCTargetExpr::VariantKind RefKind = X86MCTargetExpr::Invalid;
+  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
   
   switch (MO.getTargetFlags()) {
   default: llvm_unreachable("Unknown target flag on GV operand");
@@ -144,15 +152,15 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
   case X86II::MO_DARWIN_STUB:
     break;
       
-  case X86II::MO_TLSGD:     RefKind = X86MCTargetExpr::TLSGD; break;
-  case X86II::MO_GOTTPOFF:  RefKind = X86MCTargetExpr::GOTTPOFF; break;
-  case X86II::MO_INDNTPOFF: RefKind = X86MCTargetExpr::INDNTPOFF; break;
-  case X86II::MO_TPOFF:     RefKind = X86MCTargetExpr::TPOFF; break;
-  case X86II::MO_NTPOFF:    RefKind = X86MCTargetExpr::NTPOFF; break;
-  case X86II::MO_GOTPCREL:  RefKind = X86MCTargetExpr::GOTPCREL; break;
-  case X86II::MO_GOT:       RefKind = X86MCTargetExpr::GOT; break;
-  case X86II::MO_GOTOFF:    RefKind = X86MCTargetExpr::GOTOFF; break;
-  case X86II::MO_PLT:       RefKind = X86MCTargetExpr::PLT; break;
+  case X86II::MO_TLSGD:     RefKind = MCSymbolRefExpr::VK_TLSGD; break;
+  case X86II::MO_GOTTPOFF:  RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
+  case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
+  case X86II::MO_TPOFF:     RefKind = MCSymbolRefExpr::VK_TPOFF; break;
+  case X86II::MO_NTPOFF:    RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
+  case X86II::MO_GOTPCREL:  RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
+  case X86II::MO_GOT:       RefKind = MCSymbolRefExpr::VK_GOT; break;
+  case X86II::MO_GOTOFF:    RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
+  case X86II::MO_PLT:       RefKind = MCSymbolRefExpr::VK_PLT; break;
   case X86II::MO_PIC_BASE_OFFSET:
   case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
   case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
@@ -164,12 +172,8 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
     break;
   }
   
-  if (Expr == 0) {
-    if (RefKind == X86MCTargetExpr::Invalid)
-      Expr = MCSymbolRefExpr::Create(Sym, Ctx);
-    else
-      Expr = X86MCTargetExpr::Create(Sym, RefKind, Ctx);
-  }
+  if (Expr == 0)
+    Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
   
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::CreateAdd(Expr,
@@ -233,7 +237,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       break;
     case MachineOperand::MO_MachineBasicBlock:
       MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                       MO.getMBB()->getSymbol(Ctx), Ctx));
+                       MO.getMBB()->getSymbol(), Ctx));
       break;
     case MachineOperand::MO_GlobalAddress:
       MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
@@ -294,6 +298,29 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV64r0 -> MOV32r0
     LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
     break;
+      
+      
+  // The assembler backend wants to see branches in their small form and relax
+  // them to their large form.  The JIT can only handle the large form because
+  // it does not do relaxation.  For now, translate the large form to the
+  // small one here.
+  case X86::JMP_4: OutMI.setOpcode(X86::JMP_1); break;
+  case X86::JO_4:  OutMI.setOpcode(X86::JO_1); break;
+  case X86::JNO_4: OutMI.setOpcode(X86::JNO_1); break;
+  case X86::JB_4:  OutMI.setOpcode(X86::JB_1); break;
+  case X86::JAE_4: OutMI.setOpcode(X86::JAE_1); break;
+  case X86::JE_4:  OutMI.setOpcode(X86::JE_1); break;
+  case X86::JNE_4: OutMI.setOpcode(X86::JNE_1); break;
+  case X86::JBE_4: OutMI.setOpcode(X86::JBE_1); break;
+  case X86::JA_4:  OutMI.setOpcode(X86::JA_1); break;
+  case X86::JS_4:  OutMI.setOpcode(X86::JS_1); break;
+  case X86::JNS_4: OutMI.setOpcode(X86::JNS_1); break;
+  case X86::JP_4:  OutMI.setOpcode(X86::JP_1); break;
+  case X86::JNP_4: OutMI.setOpcode(X86::JNP_1); break;
+  case X86::JL_4:  OutMI.setOpcode(X86::JL_1); break;
+  case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
+  case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
+  case X86::JG_4:  OutMI.setOpcode(X86::JG_1); break;
   }
 }
 
@@ -344,6 +371,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
       } else
         printOperand(MI, 0);
     } else {
+      if (MI->getOperand(0).getType()==MachineOperand::MO_Register &&
+          MI->getOperand(0).getReg()==0) {
+        // Suppress offset in this case, it is not meaningful.
+        O << "undef";
+        OutStreamer.AddBlankLine();
+        return;
+      }
       // Frame address.  Currently handles register +- offset only.
       assert(MI->getOperand(0).getType()==MachineOperand::MO_Register);
       assert(MI->getOperand(3).getType()==MachineOperand::MO_Immediate);
@@ -392,11 +426,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // For this, we want to print something like:
     //   MYGLOBAL + (. - PICBASE)
     // However, we can't generate a ".", so just emit a new label here and refer
-    // to it.  We know that this operand flag occurs at most once per function.
-    const char *Prefix = MAI->getPrivateGlobalPrefix();
-    MCSymbol *DotSym = OutContext.GetOrCreateTemporarySymbol(Twine(Prefix)+
-                                                             "picbaseref" +
-                                                    Twine(getFunctionNumber()));
+    // to it.
+    MCSymbol *DotSym = OutContext.GetOrCreateTemporarySymbol();
     OutStreamer.EmitLabel(DotSym);
     
     // Now that we have emitted the label, lower the complex operand expression.
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index eed3b45..4d3dedf 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -27,7 +27,6 @@ set(sources
   X86JITInfo.cpp
   X86MCAsmInfo.cpp
   X86MCCodeEmitter.cpp 
-  X86MCTargetExpr.cpp
   X86RegisterInfo.cpp
   X86Subtarget.cpp
   X86TargetMachine.cpp
diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile
index b289647..8669fd8 100644
--- a/lib/Target/X86/Disassembler/Makefile
+++ b/lib/Target/X86/Disassembler/Makefile
@@ -11,6 +11,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMX86Disassembler
 
 # Hack: we need to include 'main' x86 target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/TargetInfo/Makefile b/lib/Target/X86/TargetInfo/Makefile
index 9858e6a..ee91982 100644
--- a/lib/Target/X86/TargetInfo/Makefile
+++ b/lib/Target/X86/TargetInfo/Makefile
@@ -11,6 +11,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMX86Info
 
 # Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index ba0ee6c..c753cf2 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -21,7 +21,6 @@ namespace llvm {
 
 class FunctionPass;
 class JITCodeEmitter;
-class MCAssembler;
 class MCCodeEmitter;
 class MCContext;
 class MachineCodeEmitter;
@@ -57,8 +56,8 @@ MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM,
 MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM,
                                          MCContext &Ctx);
 
-TargetAsmBackend *createX86_32AsmBackend(const Target &, MCAssembler &);
-TargetAsmBackend *createX86_64AsmBackend(const Target &, MCAssembler &);
+TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &);
+TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
 
 /// createX86EmitCodeToMemory - Returns a pass that converts a register
 /// allocated function into raw machine code in a dynamically
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 7919559..6a4bdb5 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -55,7 +55,8 @@ def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
 // feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
 // without disabling 64-bit mode.
 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
-                                      "Support 64-bit instructions">;
+                                      "Support 64-bit instructions",
+                                      [FeatureCMOV]>;
 def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
                                        "Bit testing of memory is slow">;
 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index e6654ef..a44afc6 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/Target/TargetAsmBackend.h"
 #include "X86.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmBackend.h"
 using namespace llvm;
@@ -17,18 +18,63 @@ namespace {
 
 class X86AsmBackend : public TargetAsmBackend {
 public:
-  X86AsmBackend(const Target &T, MCAssembler &A)
+  X86AsmBackend(const Target &T)
     : TargetAsmBackend(T) {}
 };
 
+class DarwinX86AsmBackend : public X86AsmBackend {
+public:
+  DarwinX86AsmBackend(const Target &T)
+    : X86AsmBackend(T) {}
+
+  virtual bool hasAbsolutizedSet() const { return true; }
+
+  virtual bool hasScatteredSymbols() const { return true; }
+};
+
+class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
+public:
+  DarwinX86_32AsmBackend(const Target &T)
+    : DarwinX86AsmBackend(T) {}
+};
+
+class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
+public:
+  DarwinX86_64AsmBackend(const Target &T)
+    : DarwinX86AsmBackend(T) {}
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    // Temporary labels in the string literals sections require symbols. The
+    // issue is that the x86_64 relocation format does not allow symbol +
+    // offset, and so the linker does not have enough information to resolve the
+    // access to the appropriate atom unless an external relocation is used. For
+    // non-cstring sections, we expect the compiler to use a non-temporary label
+    // for anything that could have an addend pointing outside the symbol.
+    //
+    // See <rdar://problem/4765733>.
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+    return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
+  }
+};
+
 }
 
 TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
-                                               MCAssembler &A) {
-  return new X86AsmBackend(T, A);
+                                               const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return new DarwinX86_32AsmBackend(T);
+  default:
+    return new X86AsmBackend(T);
+  }
 }
 
 TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
-                                               MCAssembler &A) {
-  return new X86AsmBackend(T, A);
+                                               const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return new DarwinX86_64AsmBackend(T);
+  default:
+    return new X86AsmBackend(T);
+  }
 }
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index ab67acb..4326814 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -12,80 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86COFFMachineModuleInfo.h"
-#include "X86MachineFunctionInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-X86COFFMachineModuleInfo::X86COFFMachineModuleInfo(const MachineModuleInfo &) {
-}
-X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
-}
-
-void X86COFFMachineModuleInfo::addExternalFunction(const StringRef& Name) {
-  CygMingStubs.insert(Name);
-}
-
-/// DecorateCygMingName - Apply various name decorations if the function uses
-/// stdcall or fastcall calling convention.
-void X86COFFMachineModuleInfo::DecorateCygMingName(SmallVectorImpl<char> &Name,
-                                                   const GlobalValue *GV,
-                                                   const TargetData &TD) {
-  const Function *F = dyn_cast<Function>(GV);
-  if (!F) return;
-
-  // We don't want to decorate non-stdcall or non-fastcall functions right now
-  CallingConv::ID CC = F->getCallingConv();
-  if (CC != CallingConv::X86_StdCall && CC != CallingConv::X86_FastCall)
-    return;
-
-  unsigned ArgWords = 0;
-  DenseMap<const Function*, unsigned>::const_iterator item = FnArgWords.find(F);
-  if (item == FnArgWords.end()) {
-    // Calculate arguments sizes
-    for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-         AI != AE; ++AI) {
-      const Type* Ty = AI->getType();
 
-      // 'Dereference' type in case of byval parameter attribute
-      if (AI->hasByValAttr())
-        Ty = cast<PointerType>(Ty)->getElementType();
-
-      // Size should be aligned to DWORD boundary
-      ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
-    }
-
-    FnArgWords[F] = ArgWords;
-  } else
-    ArgWords = item->second;
-
-  const FunctionType *FT = F->getFunctionType();
-  // "Pure" variadic functions do not receive @0 suffix.
-  if (!FT->isVarArg() || FT->getNumParams() == 0 ||
-      (FT->getNumParams() == 1 && F->hasStructRetAttr()))
-    raw_svector_ostream(Name) << '@' << ArgWords;
-
-  if (CC == CallingConv::X86_FastCall) {
-    if (Name[0] == '_')
-      Name[0] = '@';
-    else
-      Name.insert(Name.begin(), '@');
-  }
+X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
 }
 
-/// DecorateCygMingName - Query FunctionInfoMap and use this information for
-/// various name decorations for Cygwin and MingW.
-void X86COFFMachineModuleInfo::DecorateCygMingName(MCSymbol *&Name,
-                                                   MCContext &Ctx,
-                                                   const GlobalValue *GV,
-                                                   const TargetData &TD) {
-  SmallString<128> NameStr(Name->getName().begin(), Name->getName().end());
-  DecorateCygMingName(NameStr, GV, TD);
-
-  Name = Ctx.GetOrCreateSymbol(NameStr.str());
-}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 9de3dcd..eece462 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -26,17 +26,14 @@ namespace llvm {
 /// for X86 COFF targets.
 class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
   StringSet<> CygMingStubs;
-  DenseMap<const Function*, unsigned> FnArgWords;
 public:
-  X86COFFMachineModuleInfo(const MachineModuleInfo &);
-  ~X86COFFMachineModuleInfo();
+  X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
+  virtual ~X86COFFMachineModuleInfo();
 
-  void DecorateCygMingName(MCSymbol* &Name, MCContext &Ctx,
-                           const GlobalValue *GV, const TargetData &TD);
-  void DecorateCygMingName(SmallVectorImpl<char> &Name, const GlobalValue *GV,
-                           const TargetData &TD);
-
-  void addExternalFunction(const StringRef& Name);
+  void addExternalFunction(StringRef Name) {
+    CygMingStubs.insert(Name);
+  }
+    
   typedef StringSet<>::const_iterator stub_iterator;
   stub_iterator stub_begin() const { return CygMingStubs.begin(); }
   stub_iterator stub_end() const { return CygMingStubs.end(); }
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 12d3d04..fd15efd 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -221,6 +221,20 @@ def CC_X86_Win64_C : CallingConv<[
   CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
 ]>;
 
+def CC_X86_64_GHC : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+  CCIfType<[i64],
+            CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
+
+  // Pass in STG registers: F1, F2, F3, F4, D1, D2
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+            CCIfSubtarget<"hasSSE1()",
+            CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
+]>;
+
 //===----------------------------------------------------------------------===//
 // X86 C Calling Convention
 //===----------------------------------------------------------------------===//
@@ -320,3 +334,11 @@ def CC_X86_32_FastCC : CallingConv<[
   // Otherwise, same as everything else.
   CCDelegateTo<CC_X86_32_Common>
 ]>;
+
+def CC_X86_32_GHC : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Pass in STG registers: Base, Sp, Hp, R1
+  CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
+]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 8deadf6..6638e11 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -46,6 +46,7 @@ namespace {
     const TargetData    *TD;
     X86TargetMachine    &TM;
     CodeEmitter         &MCE;
+    MachineModuleInfo   *MMI;
     intptr_t PICBaseOffset;
     bool Is64BitMode;
     bool IsPIC;
@@ -115,8 +116,8 @@ FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
 
 template<class CodeEmitter>
 bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
- 
-  MCE.setModuleInfo(&getAnalysis<MachineModuleInfo>());
+  MMI = &getAnalysis<MachineModuleInfo>();
+  MCE.setModuleInfo(MMI);
   
   II = TM.getInstrInfo();
   TD = TM.getTargetData();
@@ -602,10 +603,11 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
         llvm_report_error("JIT does not support inline asm!");
       break;
     case TargetOpcode::DBG_LABEL:
-    case TargetOpcode::EH_LABEL:
     case TargetOpcode::GC_LABEL:
-      MCE.emitLabel(MI.getOperand(0).getImm());
+    case TargetOpcode::EH_LABEL:
+      MCE.emitLabel(MI.getOperand(0).getMCSymbol());
       break;
+        
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
     case X86::FP_REG_KILL:
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 98e3f4e..96b652d 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -172,7 +172,9 @@ bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
 CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
                                            bool isTaillCall) {
   if (Subtarget->is64Bit()) {
-    if (Subtarget->isTargetWin64())
+    if (CC == CallingConv::GHC)
+      return CC_X86_64_GHC;
+    else if (Subtarget->isTargetWin64())
       return CC_X86_Win64_C;
     else
       return CC_X86_64_C;
@@ -182,6 +184,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
     return CC_X86_32_FastCall;
   else if (CC == CallingConv::Fast)
     return CC_X86_32_FastCC;
+  else if (CC == CallingConv::GHC)
+    return CC_X86_32_GHC;
   else
     return CC_X86_32_C;
 }
@@ -1162,6 +1166,30 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
   // FIXME: Handle more intrinsics.
   switch (I.getIntrinsicID()) {
   default: return false;
+  case Intrinsic::objectsize: {
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+    const Type *Ty = I.getCalledFunction()->getReturnType();
+    
+    assert(CI && "Non-constant type in Intrinsic::objectsize?");
+    
+    EVT VT;
+    if (!isTypeLegal(Ty, VT))
+      return false;
+    
+    unsigned OpC = 0;
+    if (VT == MVT::i32)
+      OpC = X86::MOV32ri;
+    else if (VT == MVT::i64)
+      OpC = X86::MOV64ri;
+    else
+      return false;
+    
+    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+    BuildMI(MBB, DL, TII.get(OpC), ResultReg).
+                                  addImm(CI->getZExtValue() == 0 ? -1ULL : 0);
+    UpdateValueMap(&I, ResultReg);
+    return true;
+  }
   case Intrinsic::dbg_declare: {
     DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
     X86AddressMode AM;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 3fad8ad..4058885 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -349,17 +349,17 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
   return true;
 }
 
-/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
-/// operand and move load below the call's chain operand.
-static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
-                                  SDValue Call, SDValue CallSeqStart) {
+/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// load's chain operand and move load below the call's chain operand.
+static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+                                  SDValue Call, SDValue OrigChain) {
   SmallVector<SDValue, 8> Ops;
-  SDValue Chain = CallSeqStart.getOperand(0);
+  SDValue Chain = OrigChain.getOperand(0);
   if (Chain.getNode() == Load.getNode())
     Ops.push_back(Load.getOperand(0));
   else {
     assert(Chain.getOpcode() == ISD::TokenFactor &&
-           "Unexpected CallSeqStart chain operand");
+           "Unexpected chain operand");
     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
       if (Chain.getOperand(i).getNode() == Load.getNode())
         Ops.push_back(Load.getOperand(0));
@@ -371,9 +371,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
     Ops.clear();
     Ops.push_back(NewChain);
   }
-  for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
-    Ops.push_back(CallSeqStart.getOperand(i));
-  CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
+  for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
+    Ops.push_back(OrigChain.getOperand(i));
+  CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size());
   CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
                              Load.getOperand(1), Load.getOperand(2));
   Ops.clear();
@@ -386,7 +386,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
 /// isCalleeLoad - Return true if call address is a load and it can be
 /// moved below CALLSEQ_START and the chains leading up to the call.
 /// Return the CALLSEQ_START by reference as a second output.
-static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
+/// In the case of a tail call, there isn't a callseq node between the call
+/// chain and the load.
+static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
   if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
     return false;
   LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
@@ -397,12 +399,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
     return false;
 
   // Now let's find the callseq_start.
-  while (Chain.getOpcode() != ISD::CALLSEQ_START) {
+  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
     if (!Chain.hasOneUse())
       return false;
     Chain = Chain.getOperand(0);
   }
-  
+
+  if (!Chain.getNumOperands())
+    return false;
   if (Chain.getOperand(0).getNode() == Callee.getNode())
     return true;
   if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
@@ -420,7 +424,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
        E = CurDAG->allnodes_end(); I != E; ) {
     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 
-    if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) {
+    if (OptLevel != CodeGenOpt::None &&
+        (N->getOpcode() == X86ISD::CALL ||
+         N->getOpcode() == X86ISD::TC_RETURN)) {
       /// Also try moving call address load from outside callseq_start to just
       /// before the call to allow it to be folded.
       ///
@@ -440,11 +446,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
       ///      \        /
       ///       \      /
       ///       [CALL]
+      bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
       SDValue Chain = N->getOperand(0);
       SDValue Load  = N->getOperand(1);
-      if (!isCalleeLoad(Load, Chain))
+      if (!isCalleeLoad(Load, Chain, HasCallSeq))
         continue;
-      MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain);
+      MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
       ++NumLoadMoved;
       continue;
     }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0cfcbb6..7d2140b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,7 +16,6 @@
 #include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86ISelLowering.h"
-#include "X86MCTargetExpr.h"
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
 #include "llvm/CallingConv.h"
@@ -37,6 +36,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
@@ -45,10 +45,12 @@
 #include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
+using namespace dwarf;
 
 STATISTIC(NumTailCalls, "Number of tail calls");
 
@@ -988,6 +990,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
   setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::SHL);
@@ -1118,8 +1121,8 @@ X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
          Subtarget->isPICStyleGOT());
   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
   // entries.
-  return X86MCTargetExpr::Create(MBB->getSymbol(Ctx),
-                                 X86MCTargetExpr::GOTOFF, Ctx);
+  return MCSymbolRefExpr::Create(MBB->getSymbol(),
+                                 MCSymbolRefExpr::VK_GOTOFF, Ctx);
 }
 
 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
@@ -1378,6 +1381,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
     return !Subtarget->is64Bit();
   case CallingConv::Fast:
     return GuaranteedTailCallOpt;
+  case CallingConv::GHC:
+    return GuaranteedTailCallOpt;
   }
 }
 
@@ -1385,7 +1390,9 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){
 /// given CallingConvention value.
 CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
   if (Subtarget->is64Bit()) {
-    if (Subtarget->isTargetWin64())
+    if (CC == CallingConv::GHC)
+      return CC_X86_64_GHC;
+    else if (Subtarget->isTargetWin64())
       return CC_X86_Win64_C;
     else
       return CC_X86_64_C;
@@ -1395,6 +1402,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
     return CC_X86_32_FastCall;
   else if (CC == CallingConv::Fast)
     return CC_X86_32_FastCC;
+  else if (CC == CallingConv::GHC)
+    return CC_X86_32_GHC;
   else
     return CC_X86_32_C;
 }
@@ -1412,10 +1421,16 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                        /*AlwaysInline=*/true, NULL, 0, NULL, 0);
 }
 
+/// IsTailCallConvention - Return true if the calling convention is one that
+/// supports tail call optimization.
+static bool IsTailCallConvention(CallingConv::ID CC) {
+  return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+}
+
 /// FuncIsMadeTailCallSafe - Return true if the function is being made into
 /// a tailcall target by changing its ABI.
 static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
-  return GuaranteedTailCallOpt && CC == CallingConv::Fast;
+  return GuaranteedTailCallOpt && IsTailCallConvention(CC);
 }
 
 SDValue
@@ -1465,7 +1480,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
                                         DebugLoc dl,
                                         SelectionDAG &DAG,
                                         SmallVectorImpl<SDValue> &InVals) {
-
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
@@ -1479,8 +1493,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   bool Is64Bit = Subtarget->is64Bit();
   bool IsWin64 = Subtarget->isTargetWin64();
 
-  assert(!(isVarArg && CallConv == CallingConv::Fast) &&
-         "Var args not supported with calling convention fastcc");
+  assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+         "Var args not supported with calling convention fastcc or ghc");
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -1683,7 +1697,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   } else {
     BytesToPopOnReturn  = 0; // Callee pops nothing.
     // If this is an sret function, the return should pop the hidden pointer.
-    if (!Is64Bit && CallConv != CallingConv::Fast && ArgsAreStructReturn(Ins))
+    if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
       BytesToPopOnReturn = 4;
   }
 
@@ -1767,7 +1781,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   if (isTailCall) {
     // Check if it's really possible to do a tail call.
-    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
                                                    Outs, Ins, DAG);
 
     // Sibcalls are automatically detected tailcalls which do not require
@@ -1779,8 +1794,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       ++NumTailCalls;
   }
 
-  assert(!(isVarArg && CallConv == CallingConv::Fast) &&
-         "Var args not supported with calling convention fastcc");
+  assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+         "Var args not supported with calling convention fastcc or ghc");
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -1794,7 +1809,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // This is a sibcall. The memory operands are available in caller's
     // own caller's stack.
     NumBytes = 0;
-  else if (GuaranteedTailCallOpt && CallConv == CallingConv::Fast)
+  else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
 
   int FPDiff = 0;
@@ -2074,18 +2089,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                          OpFlags);
   }
 
-  if (isTailCall && !WasGlobalOrExternal) {
-    // Force the address into a (call preserved) caller-saved register since
-    // tailcall must happen after callee-saved registers are poped.
-    // FIXME: Give it a special register class that contains caller-saved
-    // register instead?
-    unsigned TCReg = Is64Bit ? X86::R11 : X86::EAX;
-    Chain = DAG.getCopyToReg(Chain,  dl,
-                             DAG.getRegister(TCReg, getPointerTy()),
-                             Callee,InFlag);
-    Callee = DAG.getRegister(TCReg, getPointerTy());
-  }
-
   // Returns a chain & a flag for retval copy to use.
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
   SmallVector<SDValue, 8> Ops;
@@ -2131,14 +2134,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         if (RVLocs[i].isRegLoc())
           MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
     }
-
-    assert(((Callee.getOpcode() == ISD::Register &&
-               (cast<RegisterSDNode>(Callee)->getReg() == X86::EAX ||
-                cast<RegisterSDNode>(Callee)->getReg() == X86::R11)) ||
-              Callee.getOpcode() == ISD::TargetExternalSymbol ||
-              Callee.getOpcode() == ISD::TargetGlobalAddress) &&
-           "Expecting a global address, external symbol, or scratch register");
-
     return DAG.getNode(X86ISD::TC_RETURN, dl,
                        NodeTys, &Ops[0], Ops.size());
   }
@@ -2150,7 +2145,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   unsigned NumBytesForCalleeToPush;
   if (IsCalleePop(isVarArg, CallConv))
     NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
-  else if (!Is64Bit && CallConv != CallingConv::Fast && IsStructRet)
+  else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
     // If this is a call to a struct-return function, the callee
     // pops the hidden struct pointer, so we have to push it back.
     // This is common for Darwin/X86, Linux & Mingw32 targets.
@@ -2285,17 +2280,19 @@ bool
 X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                                      CallingConv::ID CalleeCC,
                                                      bool isVarArg,
+                                                     bool isCalleeStructRet,
+                                                     bool isCallerStructRet,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                                      SelectionDAG& DAG) const {
-  if (CalleeCC != CallingConv::Fast &&
+  if (!IsTailCallConvention(CalleeCC) &&
       CalleeCC != CallingConv::C)
     return false;
 
   // If -tailcallopt is specified, make fastcc functions tail-callable.
   const Function *CallerF = DAG.getMachineFunction().getFunction();
   if (GuaranteedTailCallOpt) {
-    if (CalleeCC == CallingConv::Fast &&
+    if (IsTailCallConvention(CalleeCC) &&
         CallerF->getCallingConv() == CalleeCC)
       return true;
     return false;
@@ -2304,10 +2301,15 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   // Look for obvious safe cases to perform tail call optimization that does not
   // requite ABI changes. This is what gcc calls sibcall.
 
-  // Do not tail call optimize vararg calls for now.
+  // Do not sibcall optimize vararg calls for now.
   if (isVarArg)
     return false;
 
+  // Also avoid sibcall optimization if either caller or callee uses struct
+  // return semantics.
+  if (isCalleeStructRet || isCallerStructRet)
+    return false;
+
   // If the callee takes no arguments then go on to check the results of the
   // call.
   if (!Outs.empty()) {
@@ -6158,7 +6160,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
           N2C && N2C->isNullValue() &&
           RHSC && RHSC->isNullValue()) {
         SDValue CmpOp0 = Cmp.getOperand(0);
-        Cmp = DAG.getNode(X86ISD::CMP, dl, CmpOp0.getValueType(),
+        Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
                           CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
         return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(),
                            DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
@@ -8439,6 +8441,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case X86::CMOV_V4F32:
   case X86::CMOV_V2F64:
   case X86::CMOV_V2I64:
+  case X86::CMOV_GR16:
+  case X86::CMOV_GR32:
+  case X86::CMOV_RFP32:
+  case X86::CMOV_RFP64:
+  case X86::CMOV_RFP80:
     return EmitLoweredSelect(MI, BB, EM);
 
   case X86::FP32_TO_INT16_IN_MEM:
@@ -8521,6 +8528,21 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
     return BB;
   }
+    // DBG_VALUE.  Only the frame index case is done here.
+  case X86::DBG_VALUE: {
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    DebugLoc DL = MI->getDebugLoc();
+    X86AddressMode AM;
+    MachineFunction *F = BB->getParent();
+    AM.BaseType = X86AddressMode::FrameIndexBase;
+    AM.Base.FrameIndex = MI->getOperand(0).getImm();
+    addFullAddress(BuildMI(BB, DL, TII->get(X86::DBG_VALUE)), AM).
+      addImm(MI->getOperand(1).getImm()).
+      addMetadata(MI->getOperand(2).getMetadata());
+    F->DeleteMachineInstr(MI);      // Remove pseudo.
+    return BB;
+  }
+
     // String/text processing lowering.
   case X86::PCMPISTRM128REG:
     return EmitPCMP(MI, BB, 3, false /* in-mem */);
@@ -8832,6 +8854,87 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// PerformShuffleCombine - Detect vector gather/scatter index generation
+/// and convert it from being a bunch of shuffles and extracts to a simple
+/// store and scalar loads to extract the elements.
+static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+                                                const TargetLowering &TLI) {
+  SDValue InputVector = N->getOperand(0);
+
+  // Only operate on vectors of 4 elements, where the alternative shuffling
+  // gets to be more expensive.
+  if (InputVector.getValueType() != MVT::v4i32)
+    return SDValue();
+
+  // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
+  // single use which is a sign-extend or zero-extend, and all elements are
+  // used.
+  SmallVector<SDNode *, 4> Uses;
+  unsigned ExtractedElements = 0;
+  for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
+       UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
+    if (UI.getUse().getResNo() != InputVector.getResNo())
+      return SDValue();
+
+    SDNode *Extract = *UI;
+    if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+      return SDValue();
+
+    if (Extract->getValueType(0) != MVT::i32)
+      return SDValue();
+    if (!Extract->hasOneUse())
+      return SDValue();
+    if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
+        Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
+    if (!isa<ConstantSDNode>(Extract->getOperand(1)))
+      return SDValue();
+
+    // Record which element was extracted.
+    ExtractedElements |=
+      1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
+
+    Uses.push_back(Extract);
+  }
+
+  // If not all the elements were used, this may not be worthwhile.
+  if (ExtractedElements != 15)
+    return SDValue();
+
+  // Ok, we've now decided to do the transformation.
+  DebugLoc dl = InputVector.getDebugLoc();
+
+  // Store the value to a temporary stack slot.
+  SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0,
+                            false, false, 0);
+
+  // Replace each use (extract) with a load of the appropriate element.
+  for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
+       UE = Uses.end(); UI != UE; ++UI) {
+    SDNode *Extract = *UI;
+
+    // Compute the element's address.
+    SDValue Idx = Extract->getOperand(1);
+    unsigned EltSize =
+        InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
+    uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+    SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
+
+    SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr);
+
+    // Load the scalar.
+    SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr,
+                          NULL, 0, false, false, 0);
+
+    // Replace the exact with the load.
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
+  }
+
+  // The replacement was made in place; don't return anything.
+  return SDValue();
+}
+
 /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
 static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
                                     const X86Subtarget *Subtarget) {
@@ -9721,6 +9824,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   default: break;
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
+  case ISD::EXTRACT_VECTOR_ELT:
+                        return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
   case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
@@ -9810,7 +9915,8 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
         AsmPieces[2] == "${0:w}" &&
         IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
       AsmPieces.clear();
-      SplitString(IA->getConstraintString().substr(5), AsmPieces, ",");
+      const std::string &Constraints = IA->getConstraintString();
+      SplitString(StringRef(Constraints).substr(5), AsmPieces, ",");
       std::sort(AsmPieces.begin(), AsmPieces.end());
       if (AsmPieces.size() == 4 &&
           AsmPieces[0] == "~{cc}" &&
@@ -10265,41 +10371,3 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
 
   return Res;
 }
-
-//===----------------------------------------------------------------------===//
-//                           X86 Widen vector type
-//===----------------------------------------------------------------------===//
-
-/// getWidenVectorType: given a vector type, returns the type to widen
-/// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
-/// If there is no vector type that we want to widen to, returns MVT::Other
-/// When and where to widen is target dependent based on the cost of
-/// scalarizing vs using the wider vector type.
-
-EVT X86TargetLowering::getWidenVectorType(EVT VT) const {
-  assert(VT.isVector());
-  if (isTypeLegal(VT))
-    return VT;
-
-  // TODO: In computeRegisterProperty, we can compute the list of legal vector
-  //       type based on element type.  This would speed up our search (though
-  //       it may not be worth it since the size of the list is relatively
-  //       small).
-  EVT EltVT = VT.getVectorElementType();
-  unsigned NElts = VT.getVectorNumElements();
-
-  // On X86, it make sense to widen any vector wider than 1
-  if (NElts <= 1)
-    return MVT::Other;
-
-  for (unsigned nVT = MVT::FIRST_VECTOR_VALUETYPE;
-       nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
-    EVT SVT = (MVT::SimpleValueType)nVT;
-
-    if (isTypeLegal(SVT) &&
-        SVT.getVectorElementType() == EltVT &&
-        SVT.getVectorNumElements() > NElts)
-      return SVT;
-  }
-  return MVT::Other;
-}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 4c12fcc..0f15eba 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -564,13 +564,6 @@ namespace llvm {
       (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
     }
 
-    /// getWidenVectorType: given a vector type, returns the type to widen
-    /// to (e.g., v7i8 to v8i8). If the vector type is legal, it returns itself.
-    /// If there is no vector type that we want to widen to, returns EVT::Other
-    /// When and were to widen is target dependent based on the cost of
-    /// scalarizing vs using the wider vector type.
-    virtual EVT getWidenVectorType(EVT VT) const;
-
     /// createFastISel - This method returns a target specific FastISel object,
     /// or null if the target does not support "fast" ISel.
     virtual FastISel *
@@ -637,6 +630,8 @@ namespace llvm {
     bool IsEligibleForTailCallOptimization(SDValue Callee,
                                            CallingConv::ID CalleeCC,
                                            bool isVarArg,
+                                           bool isCalleeStructRet,
+                                           bool isCallerStructRet,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                            SelectionDAG& DAG) const;
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 8e684c9..4262c0ac 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -24,6 +24,7 @@ def i64i32imm  : Operand<i64>;
 // pc relative.
 def i64i32imm_pcrel : Operand<i64> {
   let PrintMethod = "print_pcrel_imm";
+  let ParserMatchClass = X86AbsMemAsmOperand;
 }
 
 
@@ -32,17 +33,26 @@ def i64i8imm   : Operand<i64> {
   let ParserMatchClass = ImmSExt8AsmOperand;
 }
 
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+  let PrintMethod = "printi64mem";
+  let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
 def lea64mem : Operand<i64> {
   let PrintMethod = "printlea64mem";
   let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm);
-  let ParserMatchClass = X86MemAsmOperand;
+  let ParserMatchClass = X86NoSegMemAsmOperand;
 }
 
 def lea64_32mem : Operand<i32> {
   let PrintMethod = "printlea64_32mem";
   let AsmOperandLowerMethod = "lower_lea64_32mem";
   let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
-  let ParserMatchClass = X86MemAsmOperand;
+  let ParserMatchClass = X86NoSegMemAsmOperand;
 }
 
 //===----------------------------------------------------------------------===//
@@ -176,22 +186,31 @@ let isCall = 1 in
 
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset,
-                                         variable_ops),
-                 "#TC_RETURN $dst $offset",
-                 []>;
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset,
-                                         variable_ops),
-                 "#TC_RETURN $dst $offset",
-                 []>;
-
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst, variable_ops),
-                   "jmp{q}\t{*}$dst  # TAILCALL",
-                   []>;     
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+  def TCRETURNdi64 : I<0, Pseudo, (outs),
+                         (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+                       "#TC_RETURN $dst $offset", []>;
+  def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
+                                           variable_ops),
+                       "#TC_RETURN $dst $offset", []>;
+  def TCRETURNmi64 : I<0, Pseudo, (outs), 
+                       (ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
+                       "#TC_RETURN $dst $offset", []>;
+
+  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
+                                      (ins i64i32imm_pcrel:$dst, variable_ops),
+                   "jmp\t$dst  # TAILCALL", []>;
+  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
+                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+
+  def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
+                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+}
 
 // Branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
@@ -339,6 +358,22 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                       "mov{q}\t{$src, $dst|$dst, $src}",
                       [(store i64immSExt32:$src, addr:$dst)]>;
 
+/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
+let neverHasSideEffects = 1 in
+def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+let mayLoad = 1,
+    canFoldAsLoad = 1, isReMaterializable = 1 in
+def MOV64rm_TC : RI<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}",
+                []>;
+
+let mayStore = 1 in
+def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}",
+                []>;
+
 def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
                       "mov{q}\t{$src, %rax|%rax, $src}", []>;
 def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
@@ -463,8 +498,8 @@ let neverHasSideEffects = 1 in {
 
 let Defs = [EFLAGS] in {
 
-def ADD64i32 : RI<0x05, RawFrm, (outs), (ins i32imm:$src),
-                  "add{q}\t{$src, %rax|%rax, $src}", []>;
+def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i32imm:$src),
+                     "add{q}\t{$src, %rax|%rax, $src}", []>;
 
 let isTwoAddress = 1 in {
 let isConvertibleToThreeAddress = 1 in {
@@ -520,8 +555,8 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
 
 let Uses = [EFLAGS] in {
 
-def ADC64i32 : RI<0x15, RawFrm, (outs), (ins i32imm:$src),
-                  "adc{q}\t{$src, %rax|%rax, $src}", []>;
+def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i32imm:$src),
+                     "adc{q}\t{$src, %rax|%rax, $src}", []>;
 
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
@@ -594,8 +629,8 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
                        (implicit EFLAGS)]>;
 } // isTwoAddress
 
-def SUB64i32 : RI<0x2D, RawFrm, (outs), (ins i32imm:$src),
-                  "sub{q}\t{$src, %rax|%rax, $src}", []>;
+def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src),
+                     "sub{q}\t{$src, %rax|%rax, $src}", []>;
 
 // Memory-Register Subtraction
 def SUB64mr  : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
@@ -641,8 +676,8 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst),
                       [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
 } // isTwoAddress
 
-def SBB64i32 : RI<0x1D, RawFrm, (outs), (ins i32imm:$src),
-                  "sbb{q}\t{$src, %rax|%rax, $src}", []>;
+def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i32imm:$src),
+                     "sbb{q}\t{$src, %rax|%rax, $src}", []>;
 
 def SBB64mr  : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 
                   "sbb{q}\t{$src2, $dst|$dst, $src2}",
@@ -1047,8 +1082,8 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
                 [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
 
 let Defs = [EFLAGS] in {
-def AND64i32 : RI<0x25, RawFrm, (outs), (ins i32imm:$src),
-                  "and{q}\t{$src, %rax|%rax, $src}", []>;
+def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i32imm:$src),
+                     "and{q}\t{$src, %rax|%rax, $src}", []>;
 
 let isTwoAddress = 1 in {
 let isCommutable = 1 in
@@ -1187,8 +1222,8 @@ def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
 
 // Integer comparison
 let Defs = [EFLAGS] in {
-def TEST64i32 : RI<0xa9, RawFrm, (outs), (ins i32imm:$src),
-                   "test{q}\t{$src, %rax|%rax, $src}", []>;
+def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src),
+                      "test{q}\t{$src, %rax|%rax, $src}", []>;
 let isCommutable = 1 in
 def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                   "test{q}\t{$src2, $src1|$src1, $src2}",
@@ -1210,8 +1245,8 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
                  (implicit EFLAGS)]>;
 
 
-def CMP64i32 : RI<0x3D, RawFrm, (outs), (ins i32imm:$src),
-                  "cmp{q}\t{$src, %rax|%rax, $src}", []>;
+def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src),
+                     "cmp{q}\t{$src, %rax|%rax, $src}", []>;
 def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
                  "cmp{q}\t{$src2, $src1|$src1, $src2}",
                  [(X86cmp GR64:$src1, GR64:$src2),
@@ -1884,14 +1919,21 @@ def : Pat<(X86call (i64 texternalsym:$dst)),
           (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
 
 // tailcall stuff
-def : Pat<(X86tcret GR64:$dst, imm:$off),
-          (TCRETURNri64 GR64:$dst, imm:$off)>;
+def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
+          (TCRETURNri64 GR64_TC:$dst, imm:$off)>,
+	  Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+          (TCRETURNmi64 addr:$dst, imm:$off)>,
+	  Requires<[In64BitMode]>;
 
 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
-          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>;
+          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+	  Requires<[In64BitMode]>;
 
 def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
-          (TCRETURNdi64 texternalsym:$dst, imm:$off)>;
+          (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+	  Requires<[In64BitMode]>;
 
 // Comparisons.
 
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index ae24bfb..b730918 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -350,20 +350,27 @@ def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
 def FBSTPm   : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
 
 // Floating point cmovs.
+class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
+class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
+
 multiclass FPCMov<PatLeaf cc> {
-  def _Fp32  : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+  def _Fp32  : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
                        CondMovFP,
                      [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
                                         cc, EFLAGS))]>;
-  def _Fp64  : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+  def _Fp64  : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
                        CondMovFP,
                      [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
                                         cc, EFLAGS))]>;
   def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
                      CondMovFP,
                      [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
-                                        cc, EFLAGS))]>;
+                                        cc, EFLAGS))]>,
+                                        Requires<[HasCMov]>;
 }
+
 let Uses = [EFLAGS], isTwoAddress = 1 in {
 defm CMOVB  : FPCMov<X86_COND_B>;
 defm CMOVBE : FPCMov<X86_COND_BE>;
@@ -375,6 +382,7 @@ defm CMOVNE : FPCMov<X86_COND_NE>;
 defm CMOVNP : FPCMov<X86_COND_NP>;
 }
 
+let Predicates = [HasCMov] in {
 // These are not factored because there's no clean way to pass DA/DB.
 def CMOVB_F  : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
                   "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
@@ -392,6 +400,7 @@ def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
                   "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
 def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
                   "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
+} // Predicates = [HasCMov]
 
 // Floating point loads & stores.
 let canFoldAsLoad = 1 in {
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 4fd91bb..139a905 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -266,6 +266,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOV16rr,     X86::MOV16mr, 0, 0 },
     { X86::MOV32ri,     X86::MOV32mi, 0, 0 },
     { X86::MOV32rr,     X86::MOV32mr, 0, 0 },
+    { X86::MOV32rr_TC,  X86::MOV32mr_TC, 0, 0 },
     { X86::MOV64ri32,   X86::MOV64mi32, 0, 0 },
     { X86::MOV64rr,     X86::MOV64mr, 0, 0 },
     { X86::MOV8ri,      X86::MOV8mi, 0, 0 },
@@ -301,6 +302,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::SETPr,       X86::SETPm, 0, 0 },
     { X86::SETSr,       X86::SETSm, 0, 0 },
     { X86::TAILJMPr,    X86::TAILJMPm, 1, 0 },
+    { X86::TAILJMPr64,  X86::TAILJMPm64, 1, 0 },
     { X86::TEST16ri,    X86::TEST16mi, 1, 0 },
     { X86::TEST32ri,    X86::TEST32mi, 1, 0 },
     { X86::TEST64ri32,  X86::TEST64mi32, 1, 0 },
@@ -376,6 +378,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm, 0 },
     { X86::MOV16rr,         X86::MOV16rm, 0 },
     { X86::MOV32rr,         X86::MOV32rm, 0 },
+    { X86::MOV32rr_TC,      X86::MOV32rm_TC, 0 },
     { X86::MOV64rr,         X86::MOV64rm, 0 },
     { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm, 0 },
     { X86::MOV64toSDrr,     X86::MOV64toSDrm, 0 },
@@ -675,6 +678,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
   case X86::MOV16rr:
   case X86::MOV32rr: 
   case X86::MOV64rr:
+  case X86::MOV32rr_TC: 
+  case X86::MOV64rr_TC:
 
   // FP Stack register class copies
   case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
@@ -1901,6 +1906,10 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
       Opc = X86::MOV16rr;
     } else if (CommonRC == &X86::GR8_NOREXRegClass) {
       Opc = X86::MOV8rr;
+    } else if (CommonRC == &X86::GR64_TCRegClass) {
+      Opc = X86::MOV64rr_TC;
+    } else if (CommonRC == &X86::GR32_TCRegClass) {
+      Opc = X86::MOV32rr_TC;
     } else if (CommonRC == &X86::RFP32RegClass) {
       Opc = X86::MOV_Fp3232;
     } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) {
@@ -2038,6 +2047,10 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
     Opc = X86::MOV16mr;
   } else if (RC == &X86::GR8_NOREXRegClass) {
     Opc = X86::MOV8mr;
+  } else if (RC == &X86::GR64_TCRegClass) {
+    Opc = X86::MOV64mr_TC;
+  } else if (RC == &X86::GR32_TCRegClass) {
+    Opc = X86::MOV32mr_TC;
   } else if (RC == &X86::RFP80RegClass) {
     Opc = X86::ST_FpP80m;   // pops
   } else if (RC == &X86::RFP64RegClass) {
@@ -2131,6 +2144,10 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
     Opc = X86::MOV16rm;
   } else if (RC == &X86::GR8_NOREXRegClass) {
     Opc = X86::MOV8rm;
+  } else if (RC == &X86::GR64_TCRegClass) {
+    Opc = X86::MOV64rm_TC;
+  } else if (RC == &X86::GR32_TCRegClass) {
+    Opc = X86::MOV32rm_TC;
   } else if (RC == &X86::RFP80RegClass) {
     Opc = X86::LD_Fp80m;
   } else if (RC == &X86::RFP64RegClass) {
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 071c5aa..1225b68 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -21,6 +21,7 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3,
                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
                                    SDTCisInt<0>, SDTCisInt<3>]>;
 
+// FIXME: Should be modelled as returning i32
 def SDTX86CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
 
 def SDTX86Cmov    : SDTypeProfile<1, 4,
@@ -83,7 +84,6 @@ def X86shld    : SDNode<"X86ISD::SHLD",     SDTIntShiftDOp>;
 def X86shrd    : SDNode<"X86ISD::SHRD",     SDTIntShiftDOp>;
 
 def X86cmp     : SDNode<"X86ISD::CMP" ,     SDTX86CmpTest>;
-
 def X86bt      : SDNode<"X86ISD::BT",       SDTX86CmpTest>;
 
 def X86cmov    : SDNode<"X86ISD::CMOV",     SDTX86Cmov>;
@@ -234,6 +234,15 @@ def i8mem_NOREX : Operand<i64> {
   let ParserMatchClass = X86MemAsmOperand;
 }
 
+// Special i32mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i32mem_TC : Operand<i32> {
+  let PrintMethod = "printi32mem";
+  let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
 def lea32mem : Operand<i32> {
   let PrintMethod = "printlea32mem";
   let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
@@ -288,6 +297,8 @@ def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr",
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
+def HasCMov      : Predicate<"Subtarget->hasCMov()">;
+def NoCMov       : Predicate<"!Subtarget->hasCMov()">;
 def HasMMX       : Predicate<"Subtarget->hasMMX()">;
 def HasSSE1      : Predicate<"Subtarget->hasSSE1()">;
 def HasSSE2      : Predicate<"Subtarget->hasSSE2()">;
@@ -696,30 +707,33 @@ def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
 // Tail call stuff.
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-def TCRETURNdi : I<0, Pseudo, (outs), 
-                   (ins i32imm:$dst, i32imm:$offset, variable_ops),
-                 "#TC_RETURN $dst $offset",
-                 []>;
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-def TCRETURNri : I<0, Pseudo, (outs), 
-                   (ins GR32:$dst, i32imm:$offset, variable_ops),
-                 "#TC_RETURN $dst $offset",
-                 []>;
-
-// FIXME: The should be pseudo instructions that are lowered when going to
-// mcinst.
-let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops),
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+  def TCRETURNdi : I<0, Pseudo, (outs), 
+                     (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+                   "#TC_RETURN $dst $offset", []>;
+  def TCRETURNri : I<0, Pseudo, (outs), 
+                     (ins GR32_TC:$dst, i32imm:$offset, variable_ops),
+                     "#TC_RETURN $dst $offset", []>;
+  def TCRETURNmi : I<0, Pseudo, (outs), 
+                     (ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
+                     "#TC_RETURN $dst $offset", []>;
+
+  // FIXME: The should be pseudo instructions that are lowered when going to
+  // mcinst.
+  def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst, variable_ops),
                  "jmp\t$dst  # TAILCALL",
                  []>;
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst, variable_ops), 
+  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), 
                    "jmp{l}\t{*}$dst  # TAILCALL",
                  []>;     
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst, variable_ops),
-                   "jmp\t{*}$dst  # TAILCALL", []>;
+  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
+                   "jmp{l}\t{*}$dst  # TAILCALL", []>;
+}
 
 //===----------------------------------------------------------------------===//
 //  Miscellaneous Instructions...
@@ -1032,6 +1046,22 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                 "mov{l}\t{$src, $dst|$dst, $src}",
                 [(store GR32:$src, addr:$dst)]>;
 
+/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
+let neverHasSideEffects = 1 in
+def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+
+let mayLoad = 1,
+    canFoldAsLoad = 1, isReMaterializable = 1 in
+def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}",
+                []>;
+
+let mayStore = 1 in
+def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}",
+                []>;
+
 // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
 // that they can be used for copying and storing h registers, which can't be
 // encoded when a REX prefix is present.
@@ -1185,19 +1215,7 @@ let isTwoAddress = 1 in {
 // Conditional moves
 let Uses = [EFLAGS] in {
 
-// X86 doesn't have 8-bit conditional moves. Use a customInserter to
-// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
-// however that requires promoting the operands, and can induce additional
-// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
-// clobber EFLAGS, because if one of the operands is zero, the expansion
-// could involve an xor.
-let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in
-def CMOV_GR8 : I<0, Pseudo,
-                 (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
-                 "#CMOV_GR8 PSEUDO!",
-                 [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
-                                          imm:$cond, EFLAGS))]>;
-
+let Predicates = [HasCMov] in {
 let isCommutable = 1 in {
 def CMOVB16rr : I<0x42, MRMSrcReg,       // if <u, GR16 = GR16
                   (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
@@ -1585,6 +1603,49 @@ def CMOVNO32rm : I<0x41, MRMSrcMem,       // if !overflow, GR32 = [mem32]
                    [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
                                     X86_COND_NO, EFLAGS))]>,
                   TB;
+} // Predicates = [HasCMov]
+
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
+// clobber EFLAGS, because if one of the operands is zero, the expansion
+// could involve an xor.
+let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in {
+def CMOV_GR8 : I<0, Pseudo,
+                 (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+                 "#CMOV_GR8 PSEUDO!",
+                 [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+                                          imm:$cond, EFLAGS))]>;
+
+let Predicates = [NoCMov] in {
+def CMOV_GR32 : I<0, Pseudo,
+                    (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
+                    "#CMOV_GR32* PSEUDO!",
+                    [(set GR32:$dst,
+                      (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_GR16 : I<0, Pseudo,
+                    (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
+                    "#CMOV_GR16* PSEUDO!",
+                    [(set GR16:$dst,
+                      (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_RFP32 : I<0, Pseudo,
+                    (outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+                    "#CMOV_RFP32 PSEUDO!",
+                    [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+def CMOV_RFP64 : I<0, Pseudo,
+                    (outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+                    "#CMOV_RFP64 PSEUDO!",
+                    [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+def CMOV_RFP80 : I<0, Pseudo,
+                    (outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+                    "#CMOV_RFP80 PSEUDO!",
+                    [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+} // Predicates = [NoCMov]
+} // UsesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] 
 } // Uses = [EFLAGS]
 
 
@@ -4294,14 +4355,21 @@ def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
 
 // Calls
 // tailcall stuff
-def : Pat<(X86tcret GR32:$dst, imm:$off),
-          (TCRETURNri GR32:$dst, imm:$off)>;
+def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
+          (TCRETURNri GR32_TC:$dst, imm:$off)>,
+	  Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+          (TCRETURNmi addr:$dst, imm:$off)>,
+	  Requires<[In32BitMode]>;
 
 def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
-          (TCRETURNdi texternalsym:$dst, imm:$off)>;
+          (TCRETURNdi texternalsym:$dst, imm:$off)>,
+	  Requires<[In32BitMode]>;
 
 def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
-          (TCRETURNdi texternalsym:$dst, imm:$off)>;
+          (TCRETURNdi texternalsym:$dst, imm:$off)>,
+	  Requires<[In32BitMode]>;
 
 // Normal calls, with various flavors of addresses.
 def : Pat<(X86call (i32 tglobaladdr:$dst)),
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index eea0eb8..e1203e2 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -272,9 +272,9 @@ defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
 
 // Shift up / down and insert zero's.
 def : Pat<(v1i64 (X86vshl     VR64:$src, (i8 imm:$amt))),
-          (MMX_PSLLQri VR64:$src, imm:$amt)>;
+          (MMX_PSLLQri VR64:$src, (GetLo32XForm imm:$amt))>;
 def : Pat<(v1i64 (X86vshr     VR64:$src, (i8 imm:$amt))),
-          (MMX_PSRLQri VR64:$src, imm:$amt)>;
+          (MMX_PSRLQri VR64:$src, (GetLo32XForm imm:$amt))>;
 
 // Comparison Instructions
 defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index bd6e1b8..18f9e52 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -379,7 +379,7 @@ let Constraints = "$src1 = $dst" in
 def MOVSSrr : SSI<0x10, MRMSrcReg,
                   (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
                   "movss\t{$src2, $dst|$dst, $src2}",
-                  [(set VR128:$dst,
+                  [(set (v4f32 VR128:$dst),
                         (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
 
 // Extract the low 32-bit value from one vector and insert it into another.
@@ -1141,7 +1141,7 @@ let Constraints = "$src1 = $dst" in
 def MOVSDrr : SDI<0x10, MRMSrcReg,
                   (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
                   "movsd\t{$src2, $dst|$dst, $src2}",
-                  [(set VR128:$dst,
+                  [(set (v2f64 VR128:$dst),
                         (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
 
 // Extract the low 64-bit value from one vector and insert it into another.
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index d297d24..6f0a8d9 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Function.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/System/Valgrind.h"
 #include <cstdlib>
 #include <cstring>
 using namespace llvm;
@@ -37,6 +38,10 @@ void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
   unsigned NewAddr = (intptr_t)New;
   unsigned OldAddr = (intptr_t)OldWord;
   *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+
+  // X86 doesn't need to invalidate the processor cache, so just invalidate
+  // Valgrind's cache directly.
+  sys::ValgrindDiscardTranslations(Old, 5);
 }
 
 
@@ -393,8 +398,10 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
       *(intptr_t *)(RetAddr - 0xa) = NewVal;
       ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
     }
+    sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd);
 #else
     ((unsigned char*)RetAddr)[-1] = 0xE9;
+    sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5);
 #endif
   }
 
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 9498810..1afabc9 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -70,7 +70,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
   ExceptionsType = ExceptionHandling::Dwarf;
 }
 
-X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) {
+X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
 
@@ -89,6 +89,11 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) {
 
   // Exceptions handling
   ExceptionsType = ExceptionHandling::Dwarf;
+  
+  // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
+  // .words.
+  if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
+    Data64bitsDirective = 0;
 }
 
 MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
diff --git a/lib/Target/X86/X86MCTargetExpr.cpp b/lib/Target/X86/X86MCTargetExpr.cpp
deleted file mode 100644
index 17b4fe8..0000000
--- a/lib/Target/X86/X86MCTargetExpr.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-//===- X86MCTargetExpr.cpp - X86 Target Specific MCExpr Implementation ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86MCTargetExpr.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-X86MCTargetExpr *X86MCTargetExpr::Create(const MCSymbol *Sym, VariantKind K,
-                                         MCContext &Ctx) {
-  return new (Ctx) X86MCTargetExpr(Sym, K);
-}
-
-void X86MCTargetExpr::PrintImpl(raw_ostream &OS) const {
-  OS << *Sym;
-  
-  switch (Kind) {
-  case Invalid:   OS << "@<invalid>"; break;
-  case GOT:       OS << "@GOT"; break;
-  case GOTOFF:    OS << "@GOTOFF"; break;
-  case GOTPCREL:  OS << "@GOTPCREL"; break;
-  case GOTTPOFF:  OS << "@GOTTPOFF"; break;
-  case INDNTPOFF: OS << "@INDNTPOFF"; break;
-  case NTPOFF:    OS << "@NTPOFF"; break;
-  case PLT:       OS << "@PLT"; break;
-  case TLSGD:     OS << "@TLSGD"; break;
-  case TPOFF:     OS << "@TPOFF"; break;
-  }
-}
-
-bool X86MCTargetExpr::EvaluateAsRelocatableImpl(MCValue &Res) const {
-  // FIXME: I don't know if this is right, it followed MCSymbolRefExpr.
-  
-  // Evaluate recursively if this is a variable.
-  if (Sym->isVariable())
-    return Sym->getValue()->EvaluateAsRelocatable(Res);
-  
-  Res = MCValue::get(Sym, 0, 0);
-  return true;
-}
diff --git a/lib/Target/X86/X86MCTargetExpr.h b/lib/Target/X86/X86MCTargetExpr.h
deleted file mode 100644
index 7de8a5c..0000000
--- a/lib/Target/X86/X86MCTargetExpr.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===- X86MCTargetExpr.h - X86 Target Specific MCExpr -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86_MCTARGETEXPR_H
-#define X86_MCTARGETEXPR_H
-
-#include "llvm/MC/MCExpr.h"
-
-namespace llvm {
-
-/// X86MCTargetExpr - This class represents symbol variants, like foo@GOT.
-class X86MCTargetExpr : public MCTargetExpr {
-public:
-  enum VariantKind {
-    Invalid,
-    GOT,
-    GOTOFF,
-    GOTPCREL,
-    GOTTPOFF,
-    INDNTPOFF,
-    NTPOFF,
-    PLT,
-    TLSGD,
-    TPOFF
-  };
-private:
-  /// Sym - The symbol being referenced.
-  const MCSymbol * const Sym;
-  /// Kind - The modifier.
-  const VariantKind Kind;
-  
-  X86MCTargetExpr(const MCSymbol *S, VariantKind K) : Sym(S), Kind(K) {}
-public:
-  static X86MCTargetExpr *Create(const MCSymbol *Sym, VariantKind K,
-                                 MCContext &Ctx);
-  
-  void PrintImpl(raw_ostream &OS) const;
-  bool EvaluateAsRelocatableImpl(MCValue &Res) const;
-};
-  
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index cdb579c..3238cce 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -294,13 +294,20 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
 const unsigned *
 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   bool callsEHReturn = false;
+  bool ghcCall = false;
 
   if (MF) {
     const MachineFrameInfo *MFI = MF->getFrameInfo();
     const MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
     callsEHReturn = (MMI ? MMI->callsEHReturn() : false);
+    const Function *F = MF->getFunction();
+    ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
   }
 
+  static const unsigned GhcCalleeSavedRegs[] = {
+    0
+  };
+
   static const unsigned CalleeSavedRegs32Bit[] = {
     X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
   };
@@ -326,7 +333,9 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     X86::XMM14, X86::XMM15, 0
   };
 
-  if (Is64Bit) {
+  if (ghcCall) {
+    return GhcCalleeSavedRegs;
+  } else if (Is64Bit) {
     if (IsWin64)
       return CalleeSavedRegsWin64;
     else
@@ -788,7 +797,7 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
 }
 
 void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
-                                                unsigned LabelId,
+                                                MCSymbol *Label,
                                                 unsigned FramePtr) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo *MMI = MFI->getMachineModuleInfo();
@@ -851,7 +860,7 @@ void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction &MF,
 
     MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
     MachineLocation CSSrc(Reg);
-    Moves.push_back(MachineMove(LabelId, CSDst, CSSrc));
+    Moves.push_back(MachineMove(Label, CSDst, CSSrc));
   }
 }
 
@@ -929,10 +938,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   std::vector<MachineMove> &Moves = MMI->getFrameMoves();
   const TargetData *TD = MF.getTarget().getTargetData();
   uint64_t NumBytes = 0;
-  int stackGrowth =
-    (MF.getTarget().getFrameInfo()->getStackGrowthDirection() ==
-     TargetFrameInfo::StackGrowsUp ?
-       TD->getPointerSize() : -TD->getPointerSize());
+  int stackGrowth = -TD->getPointerSize();
 
   if (HasFP) {
     // Calculate required stack adjustment.
@@ -953,26 +959,25 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
 
     if (needsFrameMoves) {
       // Mark the place where EBP/RBP was saved.
-      unsigned FrameLabelId = MMI->NextLabelID();
-      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId);
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
 
       // Define the current CFA rule to use the provided offset.
       if (StackSize) {
         MachineLocation SPDst(MachineLocation::VirtualFP);
         MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
-        Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
       } else {
         // FIXME: Verify & implement for FP
         MachineLocation SPDst(StackPtr);
         MachineLocation SPSrc(StackPtr, stackGrowth);
-        Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
       }
 
       // Change the rule for the FramePtr to be an "offset" rule.
-      MachineLocation FPDst(MachineLocation::VirtualFP,
-                            2 * stackGrowth);
+      MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
       MachineLocation FPSrc(FramePtr);
-      Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
     }
 
     // Update EBP with the new base value...
@@ -982,13 +987,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
 
     if (needsFrameMoves) {
       // Mark effective beginning of when frame pointer becomes valid.
-      unsigned FrameLabelId = MMI->NextLabelID();
-      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(FrameLabelId);
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
 
       // Define the current CFA to use the EBP/RBP register.
       MachineLocation FPDst(FramePtr);
       MachineLocation FPSrc(MachineLocation::VirtualFP);
-      Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
     }
 
     // Mark the FramePtr as live-in in every block except the entry.
@@ -1022,15 +1027,15 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
 
     if (!HasFP && needsFrameMoves) {
       // Mark callee-saved push instruction.
-      unsigned LabelId = MMI->NextLabelID();
-      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId);
+      MCSymbol *Label = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
 
       // Define the current CFA rule to use the provided offset.
       unsigned Ptr = StackSize ?
         MachineLocation::VirtualFP : StackPtr;
       MachineLocation SPDst(Ptr);
       MachineLocation SPSrc(Ptr, StackOffset);
-      Moves.push_back(MachineMove(LabelId, SPDst, SPSrc));
+      Moves.push_back(MachineMove(Label, SPDst, SPSrc));
       StackOffset += stackGrowth;
     }
   }
@@ -1094,8 +1099,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
 
   if ((NumBytes || PushedRegs) && needsFrameMoves) {
     // Mark end of stack pointer adjustment.
-    unsigned LabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(LabelId);
+    MCSymbol *Label = MMI->getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
 
     if (!HasFP && NumBytes) {
       // Define the current CFA rule to use the provided offset.
@@ -1103,18 +1108,18 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
         MachineLocation SPDst(MachineLocation::VirtualFP);
         MachineLocation SPSrc(MachineLocation::VirtualFP,
                               -StackSize + stackGrowth);
-        Moves.push_back(MachineMove(LabelId, SPDst, SPSrc));
+        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
       } else {
         // FIXME: Verify & implement for FP
         MachineLocation SPDst(StackPtr);
         MachineLocation SPSrc(StackPtr, stackGrowth);
-        Moves.push_back(MachineMove(LabelId, SPDst, SPSrc));
+        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
       }
     }
 
     // Emit DWARF info specifying the offsets of the callee-saved registers.
     if (PushedRegs)
-      emitCalleeSavedFrameMoves(MF, LabelId, HasFP ? FramePtr : StackPtr);
+      emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
   }
 }
 
@@ -1133,13 +1138,12 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
   case X86::RETI:
   case X86::TCRETURNdi:
   case X86::TCRETURNri:
-  case X86::TCRETURNri64:
+  case X86::TCRETURNmi:
   case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
   case X86::EH_RETURN:
   case X86::EH_RETURN64:
-  case X86::TAILJMPd:
-  case X86::TAILJMPr:
-  case X86::TAILJMPm:
     break;  // These are ok
   }
 
@@ -1224,11 +1228,14 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
             TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
             StackPtr).addReg(DestAddr.getReg());
   } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
-             RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
+             RetOpcode == X86::TCRETURNmi ||
+             RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+             RetOpcode == X86::TCRETURNmi64) {
+    bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
     // Tail call return: adjust the stack pointer and jump to callee.
     MBBI = prior(MBB.end());
     MachineOperand &JumpTarget = MBBI->getOperand(0);
-    MachineOperand &StackAdjust = MBBI->getOperand(1);
+    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
     assert(StackAdjust.isImm() && "Expecting immediate value.");
 
     // Adjust stack pointer.
@@ -1248,10 +1255,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
     }
 
     // Jump to label or value in register.
-    if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) {
-      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)).
+    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+      BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+                                     ? X86::TAILJMPd : X86::TAILJMPd64)).
         addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
                          JumpTarget.getTargetFlags());
+    } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
+                                       ? X86::TAILJMPm : X86::TAILJMPm64));
+      for (unsigned i = 0; i != 5; ++i)
+        MIB.addOperand(MBBI->getOperand(i));
     } else if (RetOpcode == X86::TCRETURNri64) {
       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
     } else {
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 12b2f3e..ac96c4c 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -149,7 +149,7 @@ public:
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;
 
-  void emitCalleeSavedFrameMoves(MachineFunction &MF, unsigned LabelId,
+  void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
                                  unsigned FramePtr) const;
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index ed2ce6c..76b8f7a 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -535,6 +535,13 @@ def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
 def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
   let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
 }
+def GR32_TC   : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
+  let SubRegClassList = [GR8, GR8, GR16];
+}
+def GR64_TC   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
+                                                 R8, R9, R11]> {
+  let SubRegClassList = [GR8, GR8, GR16, GR32_TC];
+}
 
 // GR8_NOREX - GR8 registers which do not require a REX prefix.
 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index adef5bc..f907614 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -315,9 +315,14 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
 
   // If requesting codegen for X86-64, make sure that 64-bit features
   // are enabled.
-  if (Is64Bit)
+  if (Is64Bit) {
     HasX86_64 = true;
 
+    // All 64-bit cpus have cmov support.
+    HasCMov = true;
+  }
+    
+
   DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
                << ", 3DNowLevel " << X863DNowLevel
                << ", 64bit " << HasX86_64 << "\n");
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 594a470..50338d3 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -133,6 +133,7 @@ public:
   PICStyles::Style getPICStyle() const { return PICStyle; }
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 
+  bool hasCMov() const { return HasCMov; }
   bool hasMMX() const { return X86SSELevel >= MMX; }
   bool hasSSE1() const { return X86SSELevel >= SSE1; }
   bool hasSSE2() const { return X86SSELevel >= SSE2; }
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index c80ae19..c15dfbb 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -7,11 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86MCTargetExpr.h"
 #include "X86TargetObjectFile.h"
 #include "X86TargetMachine.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Dwarf.h"
@@ -19,28 +20,22 @@ using namespace llvm;
 using namespace dwarf;
 
 const MCExpr *X8664_MachoTargetObjectFile::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                                 MachineModuleInfo *MMI, 
-                                 unsigned Encoding) const {
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
 
   // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
   // is an indirect pc-relative reference.
   if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
-    SmallString<128> Name;
-    Mang->getNameWithPrefix(Name, GV, false);
-    const MCSymbol *Sym;
-    if (GV->hasPrivateLinkage())
-      Sym = getContext().GetOrCreateTemporarySymbol(Name);
-    else
-      Sym = getContext().GetOrCreateSymbol(Name);
+    const MCSymbol *Sym = Mang->getSymbol(GV);
     const MCExpr *Res =
-      X86MCTargetExpr::Create(Sym, X86MCTargetExpr::GOTPCREL, getContext());
+      MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
     const MCExpr *Four = MCConstantExpr::Create(4, getContext());
     return MCBinaryExpr::CreateAdd(Res, Four, getContext());
   }
 
   return TargetLoweringObjectFileMachO::
-    getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
 unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 0444417..f2fd49c 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -17,14 +17,14 @@
 namespace llvm {
   class X86TargetMachine;
 
-  /// X8664_MachoTargetObjectFile - This TLOF implementation is used for
-  /// Darwin/x86-64.
+  /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
+  /// x86-64.
   class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
   public:
-
     virtual const MCExpr *
-    getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
-                              MachineModuleInfo *MMI, unsigned Encoding) const;
+    getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                   MachineModuleInfo *MMI, unsigned Encoding,
+                                   MCStreamer &Streamer) const;
   };
 
   class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
diff --git a/lib/Target/XCore/AsmPrinter/Makefile b/lib/Target/XCore/AsmPrinter/Makefile
index 82dc1df..581f736 100644
--- a/lib/Target/XCore/AsmPrinter/Makefile
+++ b/lib/Target/XCore/AsmPrinter/Makefile
@@ -11,6 +11,6 @@ LEVEL = ../../../..
 LIBRARYNAME = LLVMXCoreAsmPrinter
 
 # Hack: we need to include 'main' XCore target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index 82e23a1..c882338 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -53,9 +54,8 @@ namespace {
     const XCoreSubtarget &Subtarget;
   public:
     explicit XCoreAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
-                             MCContext &Ctx, MCStreamer &Streamer,
-                             const MCAsmInfo *T)
-      : AsmPrinter(O, TM, Ctx, Streamer, T),
+                             MCStreamer &Streamer)
+      : AsmPrinter(O, TM, Streamer),
       Subtarget(TM.getSubtarget<XCoreSubtarget>()) {}
 
     virtual const char *getPassName() const {
@@ -129,7 +129,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
 
   
-  MCSymbol *GVSym = GetGlobalValueSymbol(GV);
+  MCSymbol *GVSym = Mang->getSymbol(GV);
   Constant *C = GV->getInitializer();
   unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
   
@@ -276,7 +276,7 @@ printInlineJT(const MachineInstr *MI, int opNum, const std::string &directive)
     MachineBasicBlock *MBB = JTBBs[i];
     if (i > 0)
       O << ",";
-    O << *MBB->getSymbol(OutContext);
+    O << *MBB->getSymbol();
   }
 }
 
@@ -290,10 +290,10 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     O << MO.getImm();
     break;
   case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol(OutContext);
+    O << *MO.getMBB()->getSymbol();
     break;
   case MachineOperand::MO_GlobalAddress:
-    O << *GetGlobalValueSymbol(MO.getGlobal());
+    O << *Mang->getSymbol(MO.getGlobal());
     break;
   case MachineOperand::MO_ExternalSymbol:
     O << MO.getSymbolName();
diff --git a/lib/Target/XCore/README.txt b/lib/Target/XCore/README.txt
index deaeb0f..b69205b 100644
--- a/lib/Target/XCore/README.txt
+++ b/lib/Target/XCore/README.txt
@@ -5,4 +5,3 @@ To-do
 * Tailcalls
 * Investigate loop alignment
 * Add builtins
-* Make better use of lmul / macc
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 29a6ab7..1615547 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -173,29 +173,6 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
         }
         break;
       }
-      case ISD::SMUL_LOHI: {
-        // FIXME fold addition into the macc instruction
-        SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32,
-                                CurDAG->getTargetConstant(0, MVT::i32)), 0);
-        SDValue Ops[] = { Zero, Zero, N->getOperand(0), N->getOperand(1) };
-        SDNode *ResNode = CurDAG->getMachineNode(XCore::MACCS_l4r, dl,
-                                                 MVT::i32, MVT::i32, Ops, 4);
-        ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
-        ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0));
-        return NULL;
-      }
-      case ISD::UMUL_LOHI: {
-        // FIXME fold addition into the macc / lmul instruction
-        SDValue Zero(CurDAG->getMachineNode(XCore::LDC_ru6, dl, MVT::i32,
-                                  CurDAG->getTargetConstant(0, MVT::i32)), 0);
-        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
-                            Zero, Zero };
-        SDNode *ResNode = CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32,
-                                                 MVT::i32, Ops, 4);
-        ReplaceUses(SDValue(N, 0), SDValue(ResNode, 1));
-        ReplaceUses(SDValue(N, 1), SDValue(ResNode, 0));
-        return NULL;
-      }
       case XCoreISD::LADD: {
         SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
                             N->getOperand(2) };
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 8249219..bf1a457 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -324,6 +324,10 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
   return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
 }
 
+unsigned XCoreTargetLowering::getJumpTableEncoding() const {
+  return MachineJumpTableInfo::EK_Inline;
+}
+
 SDValue XCoreTargetLowering::
 LowerBR_JT(SDValue Op, SelectionDAG &DAG)
 {
@@ -1341,12 +1345,41 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
     }
   }
   break;
+  case XCoreISD::LMUL: {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    EVT VT = N0.getValueType();
+    // Canonicalize multiplicative constant to RHS. If both multiplicative
+    // operands are constant canonicalize smallest to RHS.
+    if ((N0C && !N1C) ||
+        (N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue()))
+      return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), N1, N0, N2, N3);
+
+    // lmul(x, 0, a, b)
+    if (N1C && N1C->isNullValue()) {
+      // If the high result is unused fold to add(a, b)
+      if (N->hasNUsesOfValue(0, 0)) {
+        SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
+        SDValue Ops [] = { Lo, Lo };
+        return DAG.getMergeValues(Ops, 2, dl);
+      }
+      // Otherwise fold to ladd(a, b, 0)
+      return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+    }
+  }
+  break;
   case ISD::ADD: {
-    // Fold expressions such as add(add(mul(x,y),a),b) -> lmul(x, y, a, b).
+    // Fold 32 bit expressions such as add(add(mul(x,y),a),b) ->
+    // lmul(x, y, a, b). The high result of lmul will be ignored.
     // This is only profitable if the intermediate results are unused
     // elsewhere.
     SDValue Mul0, Mul1, Addend0, Addend1;
-    if (isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
+    if (N->getValueType(0) == MVT::i32 &&
+        isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
       SDValue Zero = DAG.getConstant(0, MVT::i32);
       SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
                                     DAG.getVTList(MVT::i32, MVT::i32), Mul0,
@@ -1354,6 +1387,31 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       SDValue Result(Ignored.getNode(), 1);
       return Result;
     }
+    APInt HighMask = APInt::getHighBitsSet(64, 32);
+    // Fold 64 bit expression such as add(add(mul(x,y),a),b) ->
+    // lmul(x, y, a, b) if all operands are zero-extended. We do this
+    // before type legalization as it is messy to match the operands after
+    // that.
+    if (N->getValueType(0) == MVT::i64 &&
+        isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, false) &&
+        DAG.MaskedValueIsZero(Mul0, HighMask) &&
+        DAG.MaskedValueIsZero(Mul1, HighMask) &&
+        DAG.MaskedValueIsZero(Addend0, HighMask) &&
+        DAG.MaskedValueIsZero(Addend1, HighMask)) {
+      SDValue Mul0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                  Mul0, DAG.getConstant(0, MVT::i32));
+      SDValue Mul1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                  Mul1, DAG.getConstant(0, MVT::i32));
+      SDValue Addend0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                     Addend0, DAG.getConstant(0, MVT::i32));
+      SDValue Addend1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                     Addend1, DAG.getConstant(0, MVT::i32));
+      SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+                               DAG.getVTList(MVT::i32, MVT::i32), Mul0L, Mul1L,
+                               Addend0L, Addend1L);
+      SDValue Lo(Hi.getNode(), 1);
+      return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+    }
   }
   break;
   case ISD::STORE: {
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index f597780..3ccdeec 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -80,6 +80,8 @@ namespace llvm {
 
     explicit XCoreTargetLowering(XCoreTargetMachine &TM);
 
+    virtual unsigned getJumpTableEncoding() const;
+
     /// LowerOperation - Provide custom lowering hooks for some operations.
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
 
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 722e747..e5f5a6d 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -429,10 +429,9 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
     storeRegToStackSlot(MBB, MI, it->getReg(), true,
                         it->getFrameIdx(), it->getRegClass());
     if (emitFrameMoves) {
-      unsigned SaveLabelId = MMI->NextLabelID();
-      BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addImm(SaveLabelId);
-      XFI->getSpillLabels().push_back(
-          std::pair<unsigned, CalleeSavedInfo>(SaveLabelId, *it));
+      MCSymbol *SaveLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel);
+      XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
     }
   }
   return true;
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 124a011..a575a0f 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -31,7 +31,7 @@ private:
   int LRSpillSlot;
   int FPSpillSlot;
   int VarArgsFrameIndex;
-  std::vector<std::pair<unsigned, CalleeSavedInfo> > SpillLabels;
+  std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > SpillLabels;
 
 public:
   XCoreFunctionInfo() :
@@ -60,7 +60,7 @@ public:
   void setFPSpillSlot(int off) { FPSpillSlot = off; }
   int getFPSpillSlot() const { return FPSpillSlot; }
   
-  std::vector<std::pair<unsigned, CalleeSavedInfo> >&getSpillLabels() {
+  std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > &getSpillLabels() {
     return SpillLabels;
   }
 };
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 0ab312e..8892504 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -456,17 +456,17 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
       std::vector<MachineMove> &Moves = MMI->getFrameMoves();
       
       // Show update of SP.
-      unsigned FrameLabelId = MMI->NextLabelID();
-      BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId);
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel);
       
       MachineLocation SPDst(MachineLocation::VirtualFP);
       MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
-      Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
       
       if (LRSavedOnEntry) {
         MachineLocation CSDst(MachineLocation::VirtualFP, 0);
         MachineLocation CSSrc(XCore::LR);
-        Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
       }
     }
     if (saveLR) {
@@ -475,12 +475,11 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
       MBB.addLiveIn(XCore::LR);
       
       if (emitFrameMoves) {
-        unsigned SaveLRLabelId = MMI->NextLabelID();
-        BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveLRLabelId);
+        MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
+        BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveLRLabel);
         MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
         MachineLocation CSSrc(XCore::LR);
-        MMI->getFrameMoves().push_back(MachineMove(SaveLRLabelId,
-                                                   CSDst, CSSrc));
+        MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
       }
     }
   }
@@ -492,12 +491,11 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
     // R10 is live-in. It is killed at the spill.
     MBB.addLiveIn(XCore::R10);
     if (emitFrameMoves) {
-      unsigned SaveR10LabelId = MMI->NextLabelID();
-      BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(SaveR10LabelId);
+      MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveR10Label);
       MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
       MachineLocation CSSrc(XCore::R10);
-      MMI->getFrameMoves().push_back(MachineMove(SaveR10LabelId,
-                                                 CSDst, CSSrc));
+      MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
     }
     // Set the FP from the SP.
     unsigned FramePtr = XCore::R10;
@@ -505,21 +503,21 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
       .addImm(0);
     if (emitFrameMoves) {
       // Show FP is now valid.
-      unsigned FrameLabelId = MMI->NextLabelID();
-      BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addImm(FrameLabelId);
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel);
       MachineLocation SPDst(FramePtr);
       MachineLocation SPSrc(MachineLocation::VirtualFP);
-      MMI->getFrameMoves().push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
+      MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
     }
   }
   
   if (emitFrameMoves) {
     // Frame moves for callee saved.
     std::vector<MachineMove> &Moves = MMI->getFrameMoves();
-    std::vector<std::pair<unsigned, CalleeSavedInfo> >&SpillLabels =
+    std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
         XFI->getSpillLabels();
     for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
-      unsigned SpillLabel = SpillLabels[I].first;
+      MCSymbol *SpillLabel = SpillLabels[I].first;
       CalleeSavedInfo &CSI = SpillLabels[I].second;
       int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
       unsigned Reg = CSI.getReg();
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0582210..bdb46eb 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -751,120 +751,41 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
   return true;
 }
 
+namespace {
+class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+  InstCombiner *IC;
+protected:
+  void replaceCall(Value *With) {
+    NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
+  }
+  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+    if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) {
+      if (SizeCI->isAllOnesValue())
+        return true;
+      if (isString)
+        return SizeCI->getZExtValue() >=
+               GetStringLength(CI->getOperand(SizeArgOp));
+      if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getOperand(SizeArgOp)))
+        return SizeCI->getZExtValue() <= Arg->getZExtValue();
+    }
+    return false;
+  }
+public:
+  InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
+  Instruction *NewInstruction;
+};
+} // end anonymous namespace
+
 // Try to fold some different type of calls here.
 // Currently we're only working with the checking functions, memcpy_chk, 
 // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
 // strcat_chk and strncat_chk.
 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
   if (CI->getCalledFunction() == 0) return 0;
-  
-  StringRef Name = CI->getCalledFunction()->getName();
-  BasicBlock *BB = CI->getParent();
-  IRBuilder<> B(CI->getParent()->getContext());
-  
-  // Set the builder to the instruction after the call.
-  B.SetInsertPoint(BB, CI);
-
-  if (Name == "__memcpy_chk") {
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
-      return 0;
-    ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
-    if (!SizeArg)
-      return 0;
-    if (SizeCI->isAllOnesValue() ||
-        SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
-      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
-                 1, B, TD);
-      return ReplaceInstUsesWith(*CI, CI->getOperand(1));
-    }
-    return 0;
-  }
-
-  // Should be similar to memcpy.
-  if (Name == "__mempcpy_chk") {
-    return 0;
-  }
-
-  if (Name == "__memmove_chk") {
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
-      return 0;
-    ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
-    if (!SizeArg)
-      return 0;
-    if (SizeCI->isAllOnesValue() ||
-        SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
-      EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
-                  1, B, TD);
-      return ReplaceInstUsesWith(*CI, CI->getOperand(1));
-    }
-    return 0;
-  }
-
-  if (Name == "__memset_chk") {
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
-      return 0;
-    ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
-    if (!SizeArg)
-      return 0;
-    if (SizeCI->isAllOnesValue() ||
-        SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
-      Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
-                                   false);
-      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
-      return ReplaceInstUsesWith(*CI, CI->getOperand(1));
-    }
-    return 0;
-  }
-
-  if (Name == "__strcpy_chk") {
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
-    if (!SizeCI)
-      return 0;
-    // If a) we don't have any length information, or b) we know this will
-    // fit then just lower to a plain strcpy. Otherwise we'll keep our
-    // strcpy_chk call which may fail at runtime if the size is too long.
-    // TODO: It might be nice to get a maximum length out of the possible
-    // string lengths for varying.
-    if (SizeCI->isAllOnesValue() ||
-      SizeCI->getZExtValue() >= GetStringLength(CI->getOperand(2))) {
-      Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD);
-      return ReplaceInstUsesWith(*CI, Ret);
-    }
-    return 0;
-  }
-
-  // Should be similar to strcpy.
-  if (Name == "__stpcpy_chk") {
-    return 0;
-  }
 
-  if (Name == "__strncpy_chk") {
-    ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(4));
-    if (!SizeCI)
-      return 0;
-    ConstantInt *SizeArg = dyn_cast<ConstantInt>(CI->getOperand(3));
-    if (!SizeArg)
-      return 0;
-    if (SizeCI->isAllOnesValue() ||
-        SizeCI->getZExtValue() <= SizeArg->getZExtValue()) {
-      Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD);
-      return ReplaceInstUsesWith(*CI, Ret);
-    }
-    return 0; 
-  }
-
-  if (Name == "__strcat_chk") {
-    return 0;
-  }
-
-  if (Name == "__strncat_chk") {
-    return 0;
-  }
-
-  return 0;
+  InstCombineFortifiedLibCalls Simplifier(this);
+  Simplifier.fold(CI, TD);
+  return Simplifier.NewInstruction;
 }
 
 // visitCallSite - Improvements for call and invoke instructions.
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 7ceda1f..50c9630 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Assembly/Writer.h"
@@ -36,6 +37,7 @@
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/IRBuilder.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -72,6 +74,7 @@ namespace {
                             DenseMap<Value*,Value*> &SunkAddrs);
     bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
                                DenseMap<Value*,Value*> &SunkAddrs);
+    bool OptimizeCallInst(CallInst *CI);
     bool MoveExtToFormExtLoad(Instruction *I);
     bool OptimizeExtUses(Instruction *I);
     void findLoopBackEdges(const Function &F);
@@ -537,6 +540,47 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
   return MadeChange;
 }
 
+namespace {
+class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+protected:
+  void replaceCall(Value *With) {
+    CI->replaceAllUsesWith(With);
+    CI->eraseFromParent();
+  }
+  bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
+    if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp)))
+      return SizeCI->isAllOnesValue();
+    return false;
+  }
+};
+} // end anonymous namespace
+
+bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+  // Lower all uses of llvm.objectsize.*
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+  if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
+    bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+    const Type *ReturnTy = CI->getType();
+    Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);    
+    CI->replaceAllUsesWith(RetVal);
+    CI->eraseFromParent();
+    return true;
+  }
+
+  // From here on out we're working with named functions.
+  if (CI->getCalledFunction() == 0) return false;
+  
+  // We'll need TargetData from here on out.
+  const TargetData *TD = TLI ? TLI->getTargetData() : 0;
+  if (!TD) return false;
+  
+  // Lower all default uses of _chk calls.  This is very similar
+  // to what InstCombineCalls does, but here we are only lowering calls
+  // that have the default "don't know" as the objectsize.  Anything else
+  // should be left alone.
+  CodeGenPrepareFortifiedLibCalls Simplifier;
+  return Simplifier.fold(CI, TD);
+}
 //===----------------------------------------------------------------------===//
 // Memory Optimization
 //===----------------------------------------------------------------------===//
@@ -913,6 +957,10 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
         } else
           // Sink address computing for memory operands into the block.
           MadeChange |= OptimizeInlineAsmInst(I, &(*CI), SunkAddrs);
+      } else {
+        // Other CallInst optimizations that don't need to muck with the
+        // enclosing iterator here.
+        MadeChange |= OptimizeCallInst(CI);
       }
     }
   }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index cb563c3..de93e9f 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -43,6 +43,7 @@
 #include "llvm/BasicBlock.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/Dominators.h"
@@ -215,7 +216,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
 void IndVarSimplify::RewriteLoopExitValues(Loop *L,
                                            SCEVExpander &Rewriter) {
   // Verify the input to the pass in already in LCSSA form.
-  assert(L->isLCSSAForm());
+  assert(L->isLCSSAForm(*DT));
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   L->getUniqueExitBlocks(ExitBlocks);
@@ -445,7 +446,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Clean up dead instructions.
   Changed |= DeleteDeadPHIs(L->getHeader());
   // Check a post-condition.
-  assert(L->isLCSSAForm() && "Indvars did not leave the loop in lcssa form!");
+  assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
   return Changed;
 }
 
@@ -556,6 +557,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
     // dominates the exit block.
     if (I->mayHaveSideEffects() || I->mayReadFromMemory())
       continue;
+    // Skip debug info intrinsics.
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
     // Don't sink static AllocaInsts out of the entry block, which would
     // turn them into dynamic allocas!
     if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index a355ec3..4ad41ae 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -86,7 +86,6 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
 }
 
 bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
-  assert(L->isLCSSAForm());
   LoopInfo *LI = &getAnalysis<LoopInfo>();
 
   BasicBlock *Header = L->getHeader();
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 071e9b7..e3b809e 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -206,7 +206,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
   Function *F = currentLoop->getHeader()->getParent();
   bool Changed = false;
   do {
-    assert(currentLoop->isLCSSAForm());
+    assert(currentLoop->isLCSSAForm(*DT));
     redoLoop = false;
     Changed |= processCurrentLoop();
   } while(redoLoop);
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index b54565c..cb03423 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -14,6 +14,8 @@
 
 #include "llvm-c/Transforms/Scalar.h"
 #include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
@@ -90,6 +92,11 @@ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createScalarReplAggregatesPass());
 }
 
+void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
+                                                  int Threshold) {
+  unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
+}
+
 void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createSimplifyLibCallsPass());
 }
@@ -105,3 +112,7 @@ void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) {
 void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createDemoteRegisterToMemoryPass());
 }
+
+void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createVerifierPass());
+}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 62f34a2..738c5e8 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
 #include "llvm/Attributes.h"
 #include "llvm/Support/CFG.h"
@@ -210,12 +211,16 @@ static bool MergeEmptyReturnBlocks(Function &F) {
       // Check for something else in the block.
       BasicBlock::iterator I = Ret;
       --I;
-      if (!isa<PHINode>(I) || I != BB.begin() ||
-          Ret->getNumOperands() == 0 ||
-          Ret->getOperand(0) != I)
+      // Skip over debug info.
+      while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
+        --I;
+      if (!isa<DbgInfoIntrinsic>(I) &&
+          (!isa<PHINode>(I) || I != BB.begin() ||
+           Ret->getNumOperands() == 0 ||
+           Ret->getOperand(0) != I))
         continue;
     }
-    
+
     // If this is the first returning block, remember it and keep going.
     if (RetBlock == 0) {
       RetBlock = &BB;
@@ -239,7 +244,7 @@ static bool MergeEmptyReturnBlocks(Function &F) {
     // If the canonical return block has no PHI node, create one now.
     PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
     if (RetBlockPHI == 0) {
-      Value *InVal = cast<ReturnInst>(RetBlock->begin())->getOperand(0);
+      Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
       RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge",
                                     &RetBlock->front());
       
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 2ea4bb6..b44f019 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -72,21 +72,41 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
 /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
 /// specified pointer arguments.
 Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
-                        const TargetData *TD) {
+                        const TargetData *TD, StringRef Name) {
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
   AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
   const Type *I8Ptr = B.getInt8PtrTy();
-  Value *StrCpy = M->getOrInsertFunction("strcpy", AttrListPtr::get(AWI, 2),
+  Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
                                          I8Ptr, I8Ptr, I8Ptr, NULL);
   CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
-                               "strcpy");
+                               Name);
   if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
     CI->setCallingConv(F->getCallingConv());
   return CI;
 }
 
+/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
+                         IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  const Type *I8Ptr = B.getInt8PtrTy();
+  Value *StrNCpy = M->getOrInsertFunction("strncpy", AttrListPtr::get(AWI, 2),
+                                         I8Ptr, I8Ptr, I8Ptr,
+                                         Len->getType(), NULL);
+  CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+                               Len, "strncpy");
+  if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+
 /// EmitMemCpy - Emit a call to the memcpy function to the builder.  This always
 /// expects that the size has type 'intptr_t' and Dst/Src are pointers.
 Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
@@ -322,3 +342,86 @@ void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
   if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
     CI->setCallingConv(Fn->getCallingConv());
 }
+
+SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
+
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+  this->CI = CI;
+  StringRef Name = CI->getCalledFunction()->getName();
+  BasicBlock *BB = CI->getParent();
+  IRBuilder<> B(CI->getParent()->getContext());
+
+  // Set the builder to the instruction after the call.
+  B.SetInsertPoint(BB, CI);
+
+  if (Name == "__memcpy_chk") {
+    if (isFoldable(4, 3, false)) {
+      EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+                 1, B, TD);
+      replaceCall(CI->getOperand(1));
+      return true;
+    }
+    return false;
+  }
+
+  // Should be similar to memcpy.
+  if (Name == "__mempcpy_chk") {
+    return false;
+  }
+
+  if (Name == "__memmove_chk") {
+    if (isFoldable(4, 3, false)) {
+      EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+                  1, B, TD);
+      replaceCall(CI->getOperand(1));
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__memset_chk") {
+    if (isFoldable(4, 3, false)) {
+      Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
+                                   false);
+      EmitMemSet(CI->getOperand(1), Val,  CI->getOperand(3), B, TD);
+      replaceCall(CI->getOperand(1));
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+    // If a) we don't have any length information, or b) we know this will
+    // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+    // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+    // TODO: It might be nice to get a maximum length out of the possible
+    // string lengths for varying.
+    if (isFoldable(3, 2, true)) {
+      Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD,
+                              Name.substr(2, 6));
+      replaceCall(Ret);
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strncpy_chk") {
+    if (isFoldable(4, 3, false)) {
+      Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2),
+                               CI->getOperand(3), B, TD);
+      replaceCall(Ret);
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strcat_chk") {
+    return false;
+  }
+
+  if (Name == "__strncat_chk") {
+    return false;
+  }
+
+  return false;
+}
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 590d667..df6e603 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -88,7 +88,7 @@ namespace {
     /// verifyAnalysis() - Verify loop nest.
     virtual void verifyAnalysis() const {
       // Check the special guarantees that LCSSA makes.
-      assert(L->isLCSSAForm() && "LCSSA form not preserved!");
+      assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
     }
 
     /// inLoop - returns true if the given block is within the current loop
@@ -164,7 +164,7 @@ bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
     }
   }
   
-  assert(L->isLCSSAForm());
+  assert(L->isLCSSAForm(*DT));
   PredCache.clear();
 
   return MadeChange;
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 584ec14..1ef3c32 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Function.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
@@ -290,6 +291,9 @@ ReprocessLoop:
       bool AllInvariant = true;
       for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
         Instruction *Inst = I++;
+        // Skip debug info intrinsics.
+        if (isa<DbgInfoIntrinsic>(Inst))
+          continue;
         if (Inst == CI)
           continue;
         if (!L->makeLoopInvariant(Inst, Changed,
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index e47c86d..ac59b4d 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -105,8 +105,6 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
 /// If a LoopPassManager is passed in, and the loop is fully removed, it will be
 /// removed from the LoopPassManager as well. LPM can also be NULL.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
-  assert(L->isLCSSAForm());
-
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -370,9 +368,5 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
   if (CompletelyUnroll && LPM != NULL)
     LPM->deleteLoopFromQueue(L);
 
-  // If we didn't completely unroll the loop, it should still be in LCSSA form.
-  if (!CompletelyUnroll)
-    assert(L->isLCSSAForm());
-
   return true;
 }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index f343c38..2ce5bdc 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1826,7 +1826,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
         // switch.
         if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
           if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
-            return SimplifyCFG(BB) || 1;
+            return SimplifyCFG(BB) | true;
 
         // This block must be empty, except for the setcond inst, if it exists.
         // Ignore dbg intrinsics.
@@ -1860,7 +1860,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
       // branches to us and one of our successors, fold the setcc into the
       // predecessor and use logical operations to pick the right destination.
       if (FoldBranchToCommonDest(BI))
-        return SimplifyCFG(BB) | 1;
+        return SimplifyCFG(BB) | true;
 
 
       // Scan predecessor blocks for conditional branches.
diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp
index d8f015a..c37d5b0 100644
--- a/lib/VMCore/IntrinsicInst.cpp
+++ b/lib/VMCore/IntrinsicInst.cpp
@@ -24,8 +24,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Constants.h"
 #include "llvm/GlobalVariable.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Metadata.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 9887f28..9978f40 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -105,6 +105,11 @@ public:
   StringMap<MDString*> MDStringCache;
   
   FoldingSet<MDNode> MDNodeSet;
+  // MDNodes may be uniqued or not uniqued.  When they're not uniqued, they
+  // aren't in the MDNodeSet, but they're still shared between objects, so no
+  // one object can destroy them.  This set allows us to at least destroy them
+  // on Context destruction.
+  SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
   
   ConstantUniqueMap<char, Type, ConstantAggregateZero> AggZeroConstants;
 
@@ -235,17 +240,21 @@ public:
       (*I)->AbstractTypeUsers.clear();
       delete *I;
     }
-    // Destroy MDNode operands first.
+    // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
+    // and the NonUniquedMDNodes sets, so copy the values out first.
+    SmallVector<MDNode*, 8> MDNodes;
+    MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
     for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
-         I != E;) {
-      MDNode *N = &(*I);
-      ++I;
-      N->replaceAllOperandsWithNull();
+         I != E; ++I) {
+      MDNodes.push_back(&*I);
     }
-    while (!MDNodeSet.empty()) {
-      MDNode *N = &(*MDNodeSet.begin());
-      N->destroy();
+    MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
+    for (SmallVector<MDNode*, 8>::iterator I = MDNodes.begin(),
+           E = MDNodes.end(); I != E; ++I) {
+      (*I)->destroy();
     }
+    assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
+           "Destroying all MDNodes didn't empty the Context's sets.");
     // Destroy MDStrings.
     for (StringMap<MDString*>::iterator I = MDStringCache.begin(),
            E = MDStringCache.end(); I != E; ++I) {
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 379aeb5..06d4fd4 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -101,8 +101,10 @@ MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
 MDNode::~MDNode() {
   assert((getSubclassDataFromValue() & DestroyFlag) != 0 &&
          "Not being destroyed through destroy()?");
-  if (!isNotUniqued()) {
-    LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  if (isNotUniqued()) {
+    pImpl->NonUniquedMDNodes.erase(this);
+  } else {
     pImpl->MDNodeSet.RemoveNode(this);
   }
 
@@ -248,12 +250,10 @@ void MDNode::Profile(FoldingSetNodeID &ID) const {
     ID.AddPointer(getOperand(i));
 }
 
-// replaceAllOperandsWithNull - This is used while destroying llvm context to 
-// gracefully delete all nodes. This method replaces all operands with null.
-void MDNode::replaceAllOperandsWithNull() {
-  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
-       Op != E; ++Op)
-    replaceOperand(Op, 0);
+void MDNode::setIsNotUniqued() {
+  setValueSubclassData(getSubclassDataFromValue() | NotUniquedBit);
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  pImpl->NonUniquedMDNodes.insert(this);
 }
 
 // Replace value from this node's operand list.
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 9b2c2ca..2a0cfa8 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -87,6 +87,11 @@ void Type::destroy() const {
     pImpl->OpaqueTypes.erase(opaque_this);
   }
 
+  if (ForwardType && ForwardType->isAbstract()) {
+    ForwardType->dropRef();
+    ForwardType = NULL;
+  }
+
   // For all the other type subclasses, there is either no contained types or 
   // just one (all Sequentials). For Sequentials, the PATypeHandle is not
   // allocated past the type object, its included directly in the SequentialType
@@ -254,10 +259,12 @@ const Type *Type::getForwardedTypeInternal() const {
   // Yes, it is forwarded again.  First thing, add the reference to the new
   // forward type.
   if (RealForwardedType->isAbstract())
-    cast<DerivedType>(RealForwardedType)->addRef();
+    RealForwardedType->addRef();
 
   // Now drop the old reference.  This could cause ForwardType to get deleted.
-  cast<DerivedType>(ForwardType)->dropRef();
+  // ForwardType must be abstract because only abstract types can have their own
+  // ForwardTypes.
+  ForwardType->dropRef();
 
   // Return the updated type.
   ForwardType = RealForwardedType;
@@ -1142,8 +1149,8 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
   // Any PATypeHolders referring to this type will now automatically forward to
   // the type we are resolved to.
   ForwardType = NewType;
-  if (NewType->isAbstract())
-    cast<DerivedType>(NewType)->addRef();
+  if (ForwardType->isAbstract())
+    ForwardType->addRef();
 
   // Add a self use of the current type so that we don't delete ourself until
   // after the function exits.
author	rdivacky <rdivacky@FreeBSD.org>	2010-03-16 16:51:38 +0000
committer	rdivacky <rdivacky@FreeBSD.org>	2010-03-16 16:51:38 +0000
commit	0f448b841684305c051796982f300c9bff959307 (patch)
tree	458dd25677a43aef6390ecadb4423817f00e08b0 /lib
parent	9e2446b38c94db61b2416c28fee415c03663c11c (diff)
download	FreeBSD-src-0f448b841684305c051796982f300c9bff959307.zip FreeBSD-src-0f448b841684305c051796982f300c9bff959307.tar.gz