81 files changed, 1822 insertions, 1410 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 761fbc6..ca1f4a3 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -1,4 +1,4 @@
-//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -77,18 +77,18 @@ unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
 {
   assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
   assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
-  
+
   // find group for each register
   unsigned Group1 = GetGroup(Reg1);
   unsigned Group2 = GetGroup(Reg2);
-  
+
   // if either group is 0, then that must become the parent
   unsigned Parent = (Group1 == 0) ? Group1 : Group2;
   unsigned Other = (Parent == Group1) ? Group2 : Group1;
   GroupNodes.at(Other) = Parent;
   return Parent;
 }
-  
+
 unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
 {
   // Create a new GroupNode for Reg. Reg's existing GroupNode must
@@ -111,7 +111,7 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
 
 AggressiveAntiDepBreaker::
 AggressiveAntiDepBreaker(MachineFunction& MFi,
-                         TargetSubtarget::RegClassVector& CriticalPathRCs) : 
+                         TargetSubtarget::RegClassVector& CriticalPathRCs) :
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
@@ -126,9 +126,9 @@ AggressiveAntiDepBreaker(MachineFunction& MFi,
     else
       CriticalPathSet |= CPSet;
    }
- 
+
   DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
-  DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; 
+  DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
              r = CriticalPathSet.find_next(r))
           dbgs() << " " << TRI->getName(r));
   DEBUG(dbgs() << '\n');
@@ -232,10 +232,11 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
     // schedule region).
     if (State->IsLive(Reg)) {
       DEBUG(if (State->GetGroup(Reg) != 0)
-              dbgs() << " " << TRI->getName(Reg) << "=g" << 
+              dbgs() << " " << TRI->getName(Reg) << "=g" <<
                 State->GetGroup(Reg) << "->g0(region live-out)");
       State->UnionGroups(Reg, 0);
-    } else if ((DefIndices[Reg] < InsertPosIndex) && (DefIndices[Reg] >= Count)) {
+    } else if ((DefIndices[Reg] < InsertPosIndex)
+               && (DefIndices[Reg] >= Count)) {
       DefIndices[Reg] = Count;
     }
   }
@@ -266,7 +267,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;
-    if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || 
+    if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) ||
         IsImplicitDefUse(MI, MO)) {
       const unsigned Reg = MO.getReg();
       PassthruRegs.insert(Reg);
@@ -320,11 +321,12 @@ static SUnit *CriticalPathStep(SUnit *SU) {
 }
 
 void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
-                                             const char *tag, const char *header,
+                                             const char *tag,
+                                             const char *header,
                                              const char *footer) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   if (!State->IsLive(Reg)) {
@@ -355,10 +357,12 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
   DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer);
 }
 
-void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count,
-                                              std::set<unsigned>& PassthruRegs) {
+void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
+                                                  unsigned Count,
+                                              std::set<unsigned>& PassthruRegs)
+{
   unsigned *DefIndices = State->GetDefIndices();
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   // Handle dead defs by simulating a last-use of the register just
@@ -371,7 +375,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
-    
+
     HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
   }
 
@@ -382,7 +386,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
 
-    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); 
+    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
 
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
@@ -398,11 +402,11 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
       unsigned AliasReg = *Alias;
       if (State->IsLive(AliasReg)) {
         State->UnionGroups(Reg, AliasReg);
-        DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << 
+        DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
               TRI->getName(AliasReg) << ")");
       }
     }
-    
+
     // Note register reference...
     const TargetRegisterClass *RC = NULL;
     if (i < MI->getDesc().getNumOperands())
@@ -438,7 +442,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou
 void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
                                            unsigned Count) {
   DEBUG(dbgs() << "\tUse Groups:");
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   // Scan the register uses for this instruction and update
@@ -448,9 +452,9 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
     if (!MO.isReg() || !MO.isUse()) continue;
     unsigned Reg = MO.getReg();
     if (Reg == 0) continue;
-    
-    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << 
-          State->GetGroup(Reg)); 
+
+    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
+          State->GetGroup(Reg));
 
     // It wasn't previously live but now it is, this is a kill. Forget
     // the previous live-range information and start a new live-range
@@ -472,7 +476,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
     AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
     RegRefs.insert(std::make_pair(Reg, RR));
   }
-  
+
   DEBUG(dbgs() << '\n');
 
   // Form a group of all defs and uses of a KILL instruction to ensure
@@ -486,7 +490,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
-      
+
       if (FirstReg != 0) {
         DEBUG(dbgs() << "=" << TRI->getName(Reg));
         State->UnionGroups(FirstReg, Reg);
@@ -495,7 +499,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
         FirstReg = Reg;
       }
     }
-  
+
     DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
   }
 }
@@ -507,13 +511,14 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
   // Check all references that need rewriting for Reg. For each, use
   // the corresponding register class to narrow the set of registers
   // that are appropriate for renaming.
-  std::pair<std::multimap<unsigned, 
+  std::pair<std::multimap<unsigned,
                      AggressiveAntiDepState::RegisterReference>::iterator,
             std::multimap<unsigned,
                      AggressiveAntiDepState::RegisterReference>::iterator>
     Range = State->GetRegRefs().equal_range(Reg);
-  for (std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator
-         Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+  for (std::multimap<unsigned,
+       AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
+       QE = Range.second; Q != QE; ++Q) {
     const TargetRegisterClass *RC = Q->second.RC;
     if (RC == NULL) continue;
 
@@ -527,9 +532,9 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
 
     DEBUG(dbgs() << " " << RC->getName());
   }
-  
+
   return BV;
-}  
+}
 
 bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
                                 unsigned AntiDepGroupIndex,
@@ -537,7 +542,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
                                 std::map<unsigned, unsigned> &RenameMap) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   // Collect all referenced registers in the same group as
@@ -552,7 +557,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   // Find the "superest" register in the group. At the same time,
   // collect the BitVector of registers that can be used to rename
   // each register.
-  DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex << ":\n");
+  DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+        << ":\n");
   std::map<unsigned, BitVector> RenameRegisterMap;
   unsigned SuperReg = 0;
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
@@ -563,7 +569,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     // If Reg has any references, then collect possible rename regs
     if (RegRefs.count(Reg) > 0) {
       DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
-    
+
       BitVector BV = GetRenameRegisters(Reg);
       RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
 
@@ -590,7 +596,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     static int renamecnt = 0;
     if (renamecnt++ % DebugDiv != DebugMod)
       return false;
-    
+
     dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
       " for debug ***\n";
   }
@@ -600,9 +606,9 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   // order. If that register is available, and the corresponding
   // registers are available for the other group subregisters, then we
   // can use those registers to rename.
-  const TargetRegisterClass *SuperRC = 
+  const TargetRegisterClass *SuperRC =
     TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other);
-  
+
   const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
   const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
   if (RB == RE) {
@@ -624,7 +630,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
     const unsigned NewSuperReg = *R;
     // Don't replace a register with itself.
     if (NewSuperReg == SuperReg) continue;
-    
+
     DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
     RenameMap.clear();
 
@@ -643,7 +649,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
       }
 
       DEBUG(dbgs() << " " << TRI->getName(NewReg));
-      
+
       // Check if Reg can be renamed to NewReg.
       BitVector BV = RenameRegisterMap[Reg];
       if (!BV.test(NewReg)) {
@@ -663,7 +669,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
         for (const unsigned *Alias = TRI->getAliasSet(NewReg);
              *Alias; ++Alias) {
           unsigned AliasReg = *Alias;
-          if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) {
+          if (State->IsLive(AliasReg) ||
+              (KillIndices[Reg] > DefIndices[AliasReg])) {
             DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
             found = true;
             break;
@@ -672,11 +679,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
         if (found)
           goto next_super_reg;
       }
-      
+
       // Record that 'Reg' can be renamed to 'NewReg'.
       RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
     }
-    
+
     // If we fall-out here, then every register in the group can be
     // renamed, as recorded in RenameMap.
     RenameOrder.erase(SuperRC);
@@ -704,13 +711,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
                               unsigned InsertPosIndex) {
   unsigned *KillIndices = State->GetKillIndices();
   unsigned *DefIndices = State->GetDefIndices();
-  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& 
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
     RegRefs = State->GetRegRefs();
 
   // The code below assumes that there is at least one instruction,
   // so just duck out immediately if the block is empty.
   if (SUnits.empty()) return 0;
-  
+
   // For each regclass the next register to use for renaming.
   RenameOrderType RenameOrder;
 
@@ -729,17 +736,17 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
   if (CriticalPathSet.any()) {
     for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
       SUnit *SU = &SUnits[i];
-      if (!CriticalPathSU || 
-          ((SU->getDepth() + SU->Latency) > 
+      if (!CriticalPathSU ||
+          ((SU->getDepth() + SU->Latency) >
            (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
         CriticalPathSU = SU;
       }
     }
-    
+
     CriticalPathMI = CriticalPathSU->getInstr();
   }
 
-#ifndef NDEBUG 
+#ifndef NDEBUG
   DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
   DEBUG(dbgs() << "Available regs:");
   for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
@@ -766,7 +773,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
     // Process the defs in MI...
     PrescanInstruction(MI, Count, PassthruRegs);
-    
+
     // The dependence edges that represent anti- and output-
     // dependencies that are candidates for breaking.
     std::vector<SDep*> Edges;
@@ -779,7 +786,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
     if (MI == CriticalPathMI) {
       CriticalPathSU = CriticalPathStep(CriticalPathSU);
       CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
-    } else { 
+    } else {
       ExcludeRegs = &CriticalPathSet;
     }
 
@@ -790,14 +797,14 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
       for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
         SDep *Edge = Edges[i];
         SUnit *NextSU = Edge->getSUnit();
-        
+
         if ((Edge->getKind() != SDep::Anti) &&
             (Edge->getKind() != SDep::Output)) continue;
-        
+
         unsigned AntiDepReg = Edge->getReg();
         DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
         assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-        
+
         if (!AllocatableSet.test(AntiDepReg)) {
           // Don't break anti-dependencies on non-allocatable registers.
           DEBUG(dbgs() << " (non-allocatable)\n");
@@ -816,12 +823,13 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         } else {
           // No anti-dep breaking for implicit deps
           MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
-          assert(AntiDepOp != NULL && "Can't find index for defined register operand");
+          assert(AntiDepOp != NULL &&
+                 "Can't find index for defined register operand");
           if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
             DEBUG(dbgs() << " (implicit)\n");
             continue;
           }
-          
+
           // If the SUnit has other dependencies on the SUnit that
           // it anti-depends on, don't bother breaking the
           // anti-dependency since those edges would prevent such
@@ -847,58 +855,59 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
               DEBUG(dbgs() << " (real dependency)\n");
               AntiDepReg = 0;
               break;
-            } else if ((P->getSUnit() != NextSU) && 
-                       (P->getKind() == SDep::Data) && 
+            } else if ((P->getSUnit() != NextSU) &&
+                       (P->getKind() == SDep::Data) &&
                        (P->getReg() == AntiDepReg)) {
               DEBUG(dbgs() << " (other dependency)\n");
               AntiDepReg = 0;
               break;
             }
           }
-          
+
           if (AntiDepReg == 0) continue;
         }
-        
+
         assert(AntiDepReg != 0);
         if (AntiDepReg == 0) continue;
-        
+
         // Determine AntiDepReg's register group.
         const unsigned GroupIndex = State->GetGroup(AntiDepReg);
         if (GroupIndex == 0) {
           DEBUG(dbgs() << " (zero group)\n");
           continue;
         }
-        
+
         DEBUG(dbgs() << '\n');
-        
+
         // Look for a suitable register to use to break the anti-dependence.
         std::map<unsigned, unsigned> RenameMap;
         if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
           DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
                 << TRI->getName(AntiDepReg) << ":");
-          
+
           // Handle each group register...
           for (std::map<unsigned, unsigned>::iterator
                  S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
             unsigned CurrReg = S->first;
             unsigned NewReg = S->second;
-            
-            DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << 
-                  TRI->getName(NewReg) << "(" <<  
+
+            DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
+                  TRI->getName(NewReg) << "(" <<
                   RegRefs.count(CurrReg) << " refs)");
-            
+
             // Update the references to the old register CurrReg to
             // refer to the new register NewReg.
-            std::pair<std::multimap<unsigned, 
-                              AggressiveAntiDepState::RegisterReference>::iterator,
+            std::pair<std::multimap<unsigned,
+                           AggressiveAntiDepState::RegisterReference>::iterator,
                       std::multimap<unsigned,
-                              AggressiveAntiDepState::RegisterReference>::iterator>
+                           AggressiveAntiDepState::RegisterReference>::iterator>
               Range = RegRefs.equal_range(CurrReg);
-            for (std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>::iterator
+            for (std::multimap<unsigned,
+                 AggressiveAntiDepState::RegisterReference>::iterator
                    Q = Range.first, QE = Range.second; Q != QE; ++Q) {
               Q->second.Operand->setReg(NewReg);
             }
-            
+
             // We just went back in time and modified history; the
             // liveness information for CurrReg is now inconsistent. Set
             // the state as if it were dead.
@@ -906,7 +915,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
             RegRefs.erase(NewReg);
             DefIndices[NewReg] = DefIndices[CurrReg];
             KillIndices[NewReg] = KillIndices[CurrReg];
-            
+
             State->UnionGroups(CurrReg, 0);
             RegRefs.erase(CurrReg);
             DefIndices[CurrReg] = KillIndices[CurrReg];
@@ -915,7 +924,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
                     (DefIndices[CurrReg] == ~0u)) &&
                    "Kill and Def maps aren't consistent for AntiDepReg!");
           }
-          
+
           ++Broken;
           DEBUG(dbgs() << '\n');
         }
@@ -924,6 +933,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
     ScanInstruction(MI, Count);
   }
-  
+
   return Broken;
 }
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index d385a21..a62d68c 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -30,7 +30,7 @@
 #include <map>
 
 namespace llvm {
-  /// Class AggressiveAntiDepState 
+  /// Class AggressiveAntiDepState
   /// Contains all the state necessary for anti-dep breaking.
   class AggressiveAntiDepState {
   public:
@@ -54,27 +54,27 @@ namespace llvm {
     /// is the parent of a group, or point to another node to indicate
     /// that it is a member of the same group as that node.
     std::vector<unsigned> GroupNodes;
-  
+
     /// GroupNodeIndices - For each register, the index of the GroupNode
     /// currently representing the group that the register belongs to.
     /// Register 0 is always represented by the 0 group, a group
     /// composed of registers that are not eligible for anti-aliasing.
     unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister];
-  
+
     /// RegRefs - Map registers to all their references within a live range.
     std::multimap<unsigned, RegisterReference> RegRefs;
-  
+
     /// KillIndices - The index of the most recent kill (proceding bottom-up),
     /// or ~0u if the register is not live.
     unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
-  
+
     /// DefIndices - The index of the most recent complete def (proceding bottom
     /// up), or ~0u if the register is live.
     unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
 
   public:
     AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
-    
+
     /// GetKillIndices - Return the kill indices.
     unsigned *GetKillIndices() { return KillIndices; }
 
@@ -87,13 +87,14 @@ namespace llvm {
     // GetGroup - Get the group for a register. The returned value is
     // the index of the GroupNode representing the group.
     unsigned GetGroup(unsigned Reg);
-    
+
     // GetGroupRegs - Return a vector of the registers belonging to a
     // group. If RegRefs is non-NULL then only included referenced registers.
     void GetGroupRegs(
        unsigned Group,
        std::vector<unsigned> &Regs,
-       std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs);
+       std::multimap<unsigned,
+         AggressiveAntiDepState::RegisterReference> *RegRefs);
 
     // UnionGroups - Union Reg1's and Reg2's groups to form a new
     // group. Return the index of the GroupNode representing the
@@ -110,7 +111,7 @@ namespace llvm {
   };
 
 
-  /// Class AggressiveAntiDepBreaker 
+  /// Class AggressiveAntiDepBreaker
   class AggressiveAntiDepBreaker : public AntiDepBreaker {
     MachineFunction& MF;
     MachineRegisterInfo &MRI;
@@ -130,14 +131,15 @@ namespace llvm {
     AggressiveAntiDepState *State;
 
   public:
-    AggressiveAntiDepBreaker(MachineFunction& MFi, 
+    AggressiveAntiDepBreaker(MachineFunction& MFi,
                              TargetSubtarget::RegClassVector& CriticalPathRCs);
     ~AggressiveAntiDepBreaker();
-    
+
     /// Start - Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB);
 
-    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+    /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
     unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
@@ -160,7 +162,7 @@ namespace llvm {
     /// IsImplicitDefUse - Return true if MO represents a register
     /// that is both implicitly used and defined in MI
     bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
-    
+
     /// GetPassthruRegs - If MI implicitly def/uses a register, then
     /// return that register and all subregisters.
     void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 6b24e24..876f628 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -807,124 +807,145 @@ void AsmPrinter::EmitZeros(uint64_t NumZeros, unsigned AddrSpace) const {
 // Print out the specified constant, without a storage class.  Only the
 // constants valid in constant expressions can occur here.
 void AsmPrinter::EmitConstantValueOnly(const Constant *CV) {
-  if (CV->isNullValue() || isa<UndefValue>(CV))
+  if (CV->isNullValue() || isa<UndefValue>(CV)) {
     O << '0';
-  else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    return;
+  }
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
     O << CI->getZExtValue();
-  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    return;
+  }
+  
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
     // This is a constant address for a global variable or function. Use the
     // name of the variable or function as the address value.
     O << Mang->getMangledName(GV);
-  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    return;
+  }
+  
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+    GetBlockAddressSymbol(BA)->print(O, MAI);
+    return;
+  }
+  
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+  if (CE == 0) {
+    llvm_unreachable("Unknown constant value!");
+    O << '0';
+    return;
+  }
+  
+  switch (CE->getOpcode()) {
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  default:
+    llvm_unreachable("FIXME: Don't support this constant cast expr");
+  case Instruction::GetElementPtr: {
+    // generate a symbolic expression for the byte address
     const TargetData *TD = TM.getTargetData();
-    unsigned Opcode = CE->getOpcode();    
-    switch (Opcode) {
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-      llvm_unreachable("FIXME: Don't support this constant cast expr");
-    case Instruction::GetElementPtr: {
-      // generate a symbolic expression for the byte address
-      const Constant *ptrVal = CE->getOperand(0);
-      SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
-      if (int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
-                                                idxVec.size())) {
-        // Truncate/sext the offset to the pointer size.
-        if (TD->getPointerSizeInBits() != 64) {
-          int SExtAmount = 64-TD->getPointerSizeInBits();
-          Offset = (Offset << SExtAmount) >> SExtAmount;
-        }
-        
-        if (Offset)
-          O << '(';
-        EmitConstantValueOnly(ptrVal);
-        if (Offset > 0)
-          O << ") + " << Offset;
-        else if (Offset < 0)
-          O << ") - " << -Offset;
-      } else {
-        EmitConstantValueOnly(ptrVal);
-      }
-      break;
+    const Constant *ptrVal = CE->getOperand(0);
+    SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+    int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+                                          idxVec.size());
+    if (Offset == 0)
+      return EmitConstantValueOnly(ptrVal);
+    
+    // Truncate/sext the offset to the pointer size.
+    if (TD->getPointerSizeInBits() != 64) {
+      int SExtAmount = 64-TD->getPointerSizeInBits();
+      Offset = (Offset << SExtAmount) >> SExtAmount;
     }
-    case Instruction::BitCast:
-      return EmitConstantValueOnly(CE->getOperand(0));
-
-    case Instruction::IntToPtr: {
-      // Handle casts to pointers by changing them into casts to the appropriate
-      // integer type.  This promotes constant folding and simplifies this code.
-      Constant *Op = CE->getOperand(0);
-      Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
-                                        false/*ZExt*/);
+    
+    if (Offset)
+      O << '(';
+    EmitConstantValueOnly(ptrVal);
+    if (Offset > 0)
+      O << ") + " << Offset;
+    else
+      O << ") - " << -Offset;
+    return;
+  }
+  case Instruction::BitCast:
+    return EmitConstantValueOnly(CE->getOperand(0));
+
+  case Instruction::IntToPtr: {
+    // Handle casts to pointers by changing them into casts to the appropriate
+    // integer type.  This promotes constant folding and simplifies this code.
+    const TargetData *TD = TM.getTargetData();
+    Constant *Op = CE->getOperand(0);
+    Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+                                      false/*ZExt*/);
+    return EmitConstantValueOnly(Op);
+  }
+    
+  case Instruction::PtrToInt: {
+    // Support only foldable casts to/from pointers that can be eliminated by
+    // changing the pointer to the appropriately sized integer type.
+    Constant *Op = CE->getOperand(0);
+    const Type *Ty = CE->getType();
+    const TargetData *TD = TM.getTargetData();
+
+    // We can emit the pointer value into this slot if the slot is an
+    // integer slot greater or equal to the size of the pointer.
+    if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
       return EmitConstantValueOnly(Op);
-    }
-      
+
+    O << "((";
+    EmitConstantValueOnly(Op);
+    APInt ptrMask =
+      APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType()));
+    
+    SmallString<40> S;
+    ptrMask.toStringUnsigned(S);
+    O << ") & " << S.str() << ')';
+    return;
+  }
       
-    case Instruction::PtrToInt: {
-      // Support only foldable casts to/from pointers that can be eliminated by
-      // changing the pointer to the appropriately sized integer type.
-      Constant *Op = CE->getOperand(0);
-      const Type *Ty = CE->getType();
-
-      // We can emit the pointer value into this slot if the slot is an
-      // integer slot greater or equal to the size of the pointer.
-      if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
-        return EmitConstantValueOnly(Op);
-
-      O << "((";
-      EmitConstantValueOnly(Op);
-      APInt ptrMask =
-        APInt::getAllOnesValue(TD->getTypeAllocSizeInBits(Op->getType()));
+  case Instruction::Trunc:
+    // We emit the value and depend on the assembler to truncate the generated
+    // expression properly.  This is important for differences between
+    // blockaddress labels.  Since the two labels are in the same function, it
+    // is reasonable to treat their delta as a 32-bit value.
+    return EmitConstantValueOnly(CE->getOperand(0));
       
-      SmallString<40> S;
-      ptrMask.toStringUnsigned(S);
-      O << ") & " << S.str() << ')';
-      break;
-    }
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    O << '(';
+    EmitConstantValueOnly(CE->getOperand(0));
+    O << ')';
+    switch (CE->getOpcode()) {
     case Instruction::Add:
+     O << " + ";
+     break;
     case Instruction::Sub:
+     O << " - ";
+     break;
     case Instruction::And:
+     O << " & ";
+     break;
     case Instruction::Or:
+     O << " | ";
+     break;
     case Instruction::Xor:
-      O << '(';
-      EmitConstantValueOnly(CE->getOperand(0));
-      O << ')';
-      switch (Opcode) {
-      case Instruction::Add:
-       O << " + ";
-       break;
-      case Instruction::Sub:
-       O << " - ";
-       break;
-      case Instruction::And:
-       O << " & ";
-       break;
-      case Instruction::Or:
-       O << " | ";
-       break;
-      case Instruction::Xor:
-       O << " ^ ";
-       break;
-      default:
-       break;
-      }
-      O << '(';
-      EmitConstantValueOnly(CE->getOperand(1));
-      O << ')';
-      break;
+     O << " ^ ";
+     break;
     default:
-      llvm_unreachable("Unsupported operator!");
+     break;
     }
-  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
-    GetBlockAddressSymbol(BA)->print(O, MAI);
-  } else {
-    llvm_unreachable("Unknown constant value!");
+    O << '(';
+    EmitConstantValueOnly(CE->getOperand(1));
+    O << ')';
+    break;
   }
 }
 
@@ -1225,8 +1246,7 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
                                             unsigned AddrSpace) {
   const TargetData *TD = TM.getTargetData();
   unsigned BitWidth = CI->getBitWidth();
-  assert(isPowerOf2_32(BitWidth) &&
-         "Non-power-of-2-sized integers not handled!");
+  assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
 
   // We don't expect assemblers to support integer data directives
   // for more than 64 bits, so we emit the data in at most 64-bit
@@ -1239,39 +1259,34 @@ void AsmPrinter::EmitGlobalConstantLargeInt(const ConstantInt *CI,
     else
       Val = RawData[i];
 
-    if (MAI->getData64bitsDirective(AddrSpace))
+    if (MAI->getData64bitsDirective(AddrSpace)) {
       O << MAI->getData64bitsDirective(AddrSpace) << Val << '\n';
-    else if (TD->isBigEndian()) {
-      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
-      if (VerboseAsm) {
-        O.PadToColumn(MAI->getCommentColumn());
-        O << MAI->getCommentString()
-          << " most significant half of i64 " << Val;
-      }
-      O << '\n';
-      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
-      if (VerboseAsm) {
-        O.PadToColumn(MAI->getCommentColumn());
-        O << MAI->getCommentString()
-          << " least significant half of i64 " << Val;
-      }
-      O << '\n';
-    } else {
-      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val);
-      if (VerboseAsm) {
-        O.PadToColumn(MAI->getCommentColumn());
-        O << MAI->getCommentString()
-          << " least significant half of i64 " << Val;
-      }
-      O << '\n';
-      O << MAI->getData32bitsDirective(AddrSpace) << unsigned(Val >> 32);
-      if (VerboseAsm) {
-        O.PadToColumn(MAI->getCommentColumn());
-        O << MAI->getCommentString()
-          << " most significant half of i64 " << Val;
-      }
-      O << '\n';
+      continue;
     }
+
+    // Emit two 32-bit chunks, order depends on endianness.
+    unsigned FirstChunk = unsigned(Val), SecondChunk = unsigned(Val >> 32);
+    const char *FirstName = " least", *SecondName = " most";
+    if (TD->isBigEndian()) {
+      std::swap(FirstChunk, SecondChunk);
+      std::swap(FirstName, SecondName);
+    }
+    
+    O << MAI->getData32bitsDirective(AddrSpace) << FirstChunk;
+    if (VerboseAsm) {
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString()
+        << FirstName << " significant half of i64 " << Val;
+    }
+    O << '\n';
+    
+    O << MAI->getData32bitsDirective(AddrSpace) << SecondChunk;
+    if (VerboseAsm) {
+      O.PadToColumn(MAI->getCommentColumn());
+      O << MAI->getCommentString()
+        << SecondName << " significant half of i64 " << Val;
+    }
+    O << '\n';
   }
 }
 
@@ -1284,22 +1299,39 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
   if (CV->isNullValue() || isa<UndefValue>(CV)) {
     EmitZeros(Size, AddrSpace);
     return;
-  } else if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+  }
+  
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
     EmitGlobalConstantArray(CVA , AddrSpace);
     return;
-  } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+  }
+  
+  if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
     EmitGlobalConstantStruct(CVS, AddrSpace);
     return;
-  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+  }
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
     EmitGlobalConstantFP(CFP, AddrSpace);
     return;
-  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+  }
+  
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    // If we can directly emit an 8-byte constant, do it.
+    if (Size == 8)
+      if (const char *Data64Dir = MAI->getData64bitsDirective(AddrSpace)) {
+        O << Data64Dir << CI->getZExtValue() << '\n';
+        return;
+      }
+
     // Small integers are handled below; large integers are handled here.
     if (Size > 4) {
       EmitGlobalConstantLargeInt(CI, AddrSpace);
       return;
     }
-  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+  }
+  
+  if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
     EmitGlobalConstantVector(CP);
     return;
   }
@@ -1617,7 +1649,7 @@ void AsmPrinter::printLabel(unsigned Id) const {
 
 /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
 /// instruction, using the specified assembler variant.  Targets should
-/// overried this to format as appropriate.
+/// override this to format as appropriate.
 bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                  unsigned AsmVariant, const char *ExtraCode) {
   // Target doesn't support this yet!
@@ -1645,15 +1677,17 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
   // This code must use the function name itself, and not the function number,
   // since it must be possible to generate the label name from within other
   // functions.
-  std::string FuncName = Mang->getMangledName(F);
+  SmallString<60> FnName;
+  Mang->getNameWithPrefix(FnName, F, false);
 
-  SmallString<60> Name;
-  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA"
-    << FuncName.size() << '_' << FuncName << '_'
-    << Mang->makeNameProper(BB->getName())
-    << Suffix;
+  // FIXME: THIS IS BROKEN IF THE LLVM BASIC BLOCK DOESN'T HAVE A NAME!
+  SmallString<60> NameResult;
+  Mang->getNameWithPrefix(NameResult,
+                          StringRef("BA") + Twine((unsigned)FnName.size()) + 
+                          "_" + FnName.str() + "_" + BB->getName() + Suffix, 
+                          Mangler::Private);
 
-  return OutContext.GetOrCreateSymbol(Name.str());
+  return OutContext.GetOrCreateSymbol(NameResult.str());
 }
 
 MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 8a3ceb6..15f37ae 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -212,19 +212,30 @@ public:
   ///
   void addVariable(DbgVariable *V) { Variables.push_back(V); }
 
-  void fixInstructionMarkers() {
+  void fixInstructionMarkers(DenseMap<const MachineInstr *, 
+                             unsigned> &MIIndexMap) {
     assert (getFirstInsn() && "First instruction is missing!");
-    if (getLastInsn())
-      return;
-
-    // If a scope does not have an instruction to mark an end then use
-    // the end of last child scope.
+    
+    // Use the end of last child scope as end of this scope.
     SmallVector<DbgScope *, 4> &Scopes = getScopes();
-    assert (!Scopes.empty() && "Inner most scope does not have last insn!");
-    DbgScope *L = Scopes.back();
-    if (!L->getLastInsn())
-      L->fixInstructionMarkers();
-    setLastInsn(L->getLastInsn());
+    const MachineInstr *LastInsn = getFirstInsn();
+    unsigned LIndex = 0;
+    if (Scopes.empty()) {
+      assert (getLastInsn() && "Inner most scope does not have last insn!");
+      return;
+    }
+    for (SmallVector<DbgScope *, 4>::iterator SI = Scopes.begin(),
+           SE = Scopes.end(); SI != SE; ++SI) {
+      DbgScope *DS = *SI;
+      DS->fixInstructionMarkers(MIIndexMap);
+      const MachineInstr *DSLastInsn = DS->getLastInsn();
+      unsigned DSI = MIIndexMap[DSLastInsn];
+      if (DSI > LIndex) {
+        LastInsn = DSLastInsn;
+        LIndex = DSI;
+      }
+    }
+    setLastInsn(LastInsn);
   }
 
 #ifndef NDEBUG
@@ -1021,6 +1032,16 @@ DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) {
   return Enumerator;
 }
 
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm 
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+  char One = '\1';
+  if (LinkageName.startswith(StringRef(&One, 1)))
+    return LinkageName.substr(1);
+  return LinkageName;
+}
+
 /// createGlobalVariableDIE - Create new DIE using GV.
 DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) {
   // If the global variable was optmized out then no need to create debug info
@@ -1033,16 +1054,10 @@ DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) {
             GV.getDisplayName());
 
   StringRef LinkageName = GV.getLinkageName();
-  if (!LinkageName.empty()) {
-    // Skip special LLVM prefix that is used to inform the asm printer to not
-    // emit usual symbol prefix before the symbol name. This happens for
-    // Objective-C symbol names and symbol whose name is replaced using GCC's
-    // __asm__ attribute.
-    if (LinkageName[0] == 1)
-      LinkageName = LinkageName.substr(1);
+  if (!LinkageName.empty())
     addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
-              LinkageName);
-  }
+              getRealLinkageName(LinkageName));
+
   addType(GVDie, GV.getType());
   if (!GV.isLocalToUnit())
     addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
@@ -1074,10 +1089,9 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
     addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
 
     uint64_t Offset = DT.getOffsetInBits();
-    uint64_t FieldOffset = Offset;
     uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
     uint64_t HiMark = (Offset + FieldSize) & AlignMask;
-    FieldOffset = (HiMark - FieldSize);
+    uint64_t FieldOffset = (HiMark - FieldSize);
     Offset -= FieldOffset;
 
     // Maybe we need to work from the other end.
@@ -1119,16 +1133,10 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
   addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
 
   StringRef LinkageName = SP.getLinkageName();
-  if (!LinkageName.empty()) {
-    // Skip special LLVM prefix that is used to inform the asm printer to not
-    // emit usual symbol prefix before the symbol name. This happens for
-    // Objective-C symbol names and symbol whose name is replaced using GCC's
-    // __asm__ attribute.
-    if (LinkageName[0] == 1)
-      LinkageName = LinkageName.substr(1);
+  if (!LinkageName.empty())
     addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
-              LinkageName);
-  }
+              getRealLinkageName(LinkageName));
+
   addSourceLine(SPDie, &SP);
 
   // Add prototyped tag, if C or ObjC.
@@ -1382,7 +1390,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
     I->second.push_back(std::make_pair(StartID, ScopeDIE));
 
   StringPool.insert(InlinedSP.getName());
-  StringPool.insert(InlinedSP.getLinkageName());
+  StringPool.insert(getRealLinkageName(InlinedSP.getLinkageName()));
+
   DILocation DL(Scope->getInlinedAt());
   addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
   addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
@@ -1644,8 +1653,11 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   ModuleCU->insertDIE(N, VariableDie);
 
   // Add to context owner.
-  if (DI_GV.isDefinition() 
-      && !DI_GV.getContext().isCompileUnit()) {
+  DIDescriptor GVContext = DI_GV.getContext();
+  // Do not create specification DIE if context is either compile unit
+  // or a subprogram.
+  if (DI_GV.isDefinition() && !GVContext.isCompileUnit()
+      && !GVContext.isSubprogram()) {
     // Create specification DIE.
     DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
     addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
@@ -1663,7 +1675,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
                    Asm->Mang->getMangledName(DI_GV.getGlobal()));
     addBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
   }
-  addToContextOwner(VariableDie, DI_GV.getContext());
+  addToContextOwner(VariableDie, GVContext);
   
   // Expose as global. FIXME - need to check external flag.
   ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
@@ -1804,7 +1816,8 @@ void DwarfDebug::endModule() {
     DIE *NDie = ModuleCU->getDIE(N);
     if (!NDie) continue;
     addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
-    addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+    // FIXME - This is not the correct approach.
+    // addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
   }
 
   // Standard sections final addresses.
@@ -1976,12 +1989,15 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) {
   if (!DbgScopeMap.empty())
     return false;
 
+  DenseMap<const MachineInstr *, unsigned> MIIndexMap;
+  unsigned MIIndex = 0;
   // Scan each instruction and create scopes. First build working set of scopes.
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
+      MIIndexMap[MInsn] = MIIndex++;
       DebugLoc DL = MInsn->getDebugLoc();
       if (DL.isUnknown()) continue;
       DebugLocTuple DLT = MF->getDebugLocTuple(DL);
@@ -2014,16 +2030,10 @@ bool DwarfDebug::extractScopeInformation(MachineFunction *MF) {
     }
   }
 
-  // If a scope's last instruction is not set then use its child scope's
-  // last instruction as this scope's last instrunction.
-  for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
-	 DE = DbgScopeMap.end(); DI != DE; ++DI) {
-    if (DI->second->isAbstractScope())
-      continue;
-    assert (DI->second->getFirstInsn() && "Invalid first instruction!");
-    DI->second->fixInstructionMarkers();
-    assert (DI->second->getLastInsn() && "Invalid last instruction!");
-  }
+  if (!CurrentFnDbgScope)
+    return false;
+
+  CurrentFnDbgScope->fixInstructionMarkers(MIIndexMap);
 
   // Each scope has first instruction and last instruction to mark beginning
   // and end of a scope respectively. Create an inverse map that list scopes
@@ -2105,38 +2115,41 @@ void DwarfDebug::endFunction(MachineFunction *MF) {
   if (DbgScopeMap.empty())
     return;
 
-  // Define end label for subprogram.
-  EmitLabel("func_end", SubprogramCount);
-
-  // Get function line info.
-  if (!Lines.empty()) {
-    // Get section line info.
-    unsigned ID = SectionMap.insert(Asm->getCurrentSection());
-    if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
-    std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
-    // Append the function info to section info.
-    SectionLineInfos.insert(SectionLineInfos.end(),
-                            Lines.begin(), Lines.end());
+  if (CurrentFnDbgScope) {
+    // Define end label for subprogram.
+    EmitLabel("func_end", SubprogramCount);
+    
+    // Get function line info.
+    if (!Lines.empty()) {
+      // Get section line info.
+      unsigned ID = SectionMap.insert(Asm->getCurrentSection());
+      if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
+      std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
+      // Append the function info to section info.
+      SectionLineInfos.insert(SectionLineInfos.end(),
+                              Lines.begin(), Lines.end());
+    }
+    
+    // Construct abstract scopes.
+    for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
+           AE = AbstractScopesList.end(); AI != AE; ++AI)
+      constructScopeDIE(*AI);
+    
+    constructScopeDIE(CurrentFnDbgScope);
+    
+    DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
+                                                 MMI->getFrameMoves()));
   }
 
-  // Construct abstract scopes.
-  for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
-         AE = AbstractScopesList.end(); AI != AE; ++AI)
-    constructScopeDIE(*AI);
-
-  constructScopeDIE(CurrentFnDbgScope);
-
-  DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
-                                               MMI->getFrameMoves()));
-
   // Clear debug info
-  CurrentFnDbgScope = NULL;
-  DbgScopeMap.clear();
-  DbgScopeBeginMap.clear();
-  DbgScopeEndMap.clear();
-  ConcreteScopes.clear();
-  AbstractScopesList.clear();
-
+  if (CurrentFnDbgScope) {
+    CurrentFnDbgScope = NULL;
+    DbgScopeMap.clear();
+    DbgScopeBeginMap.clear();
+    DbgScopeEndMap.clear();
+    ConcreteScopes.clear();
+    AbstractScopesList.clear();
+  }
   Lines.clear();
   
   if (TimePassesIsEnabled)
@@ -2908,8 +2921,6 @@ void DwarfDebug::emitDebugInlineInfo() {
   for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
 
-//  for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
-    //        I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
     MDNode *Node = *I;
     ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
       = InlineInfo.find(Node);
@@ -2920,20 +2931,11 @@ void DwarfDebug::emitDebugInlineInfo() {
 
     if (LName.empty())
       Asm->EmitString(Name);
-    else {
-      // Skip special LLVM prefix that is used to inform the asm printer to not
-      // emit usual symbol prefix before the symbol name. This happens for
-      // Objective-C symbol names and symbol whose name is replaced using GCC's
-      // __asm__ attribute.
-      if (LName[0] == 1)
-        LName = LName.substr(1);
-//      Asm->EmitString(LName);
+    else 
       EmitSectionOffset("string", "section_str",
-                        StringPool.idFor(LName), false, true);
+                        StringPool.idFor(getRealLinkageName(LName)), false, true);
 
-    }
     Asm->EOL("MIPS linkage name");
-//    Asm->EmitString(Name);
     EmitSectionOffset("string", "section_str",
                       StringPool.idFor(Name), false, true);
     Asm->EOL("Function name");
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 7a969f0..6bc808c 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -35,12 +35,13 @@ add_llvm_library(LLVMCodeGen
   MachineModuleInfoImpls.cpp
   MachinePassRegistry.cpp
   MachineRegisterInfo.cpp
-  MachineSink.cpp
   MachineSSAUpdater.cpp
+  MachineSink.cpp
   MachineVerifier.cpp
   MaxStackAlignment.cpp
   ObjectCodeEmitter.cpp
   OcamlGC.cpp
+  OptimizeExts.cpp
   PHIElimination.cpp
   Passes.cpp
   PostRASchedulerList.cpp
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 3c7961c2..056e2d5 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -288,9 +288,11 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
 }
 
 unsigned
-CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg,
+CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
+                                                 unsigned AntiDepReg,
                                                  unsigned LastNewReg,
-                                                 const TargetRegisterClass *RC) {
+                                                 const TargetRegisterClass *RC)
+{
   for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
        RE = RC->allocation_order_end(MF); R != RE; ++R) {
     unsigned NewReg = *R;
@@ -300,12 +302,16 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg,
     // an anti-dependence with this AntiDepReg, because that would
     // re-introduce that anti-dependence.
     if (NewReg == LastNewReg) continue;
+    // If the instruction already has a def of the NewReg, it's not suitable.
+    // For example, Instruction with multiple definitions can result in this
+    // condition.
+    if (MI->modifiesRegister(NewReg, TRI)) continue;
     // If NewReg is dead and NewReg's most recent def is not before
     // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
-    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) &&
-           "Kill and Def maps aren't consistent for AntiDepReg!");
-    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) &&
-           "Kill and Def maps aren't consistent for NewReg!");
+    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+           && "Kill and Def maps aren't consistent for AntiDepReg!");
+    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+           && "Kill and Def maps aren't consistent for NewReg!");
     if (KillIndices[NewReg] != ~0u ||
         Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
         KillIndices[AntiDepReg] > DefIndices[NewReg])
@@ -336,14 +342,14 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
 
 #ifndef NDEBUG
   {
-    DEBUG(errs() << "Critical path has total latency "
+    DEBUG(dbgs() << "Critical path has total latency "
           << (Max->getDepth() + Max->Latency) << "\n");
-    DEBUG(errs() << "Available regs:");
+    DEBUG(dbgs() << "Available regs:");
     for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
       if (KillIndices[Reg] == ~0u)
-        DEBUG(errs() << " " << TRI->getName(Reg));
+        DEBUG(dbgs() << " " << TRI->getName(Reg));
     }
-    DEBUG(errs() << '\n');
+    DEBUG(dbgs() << '\n');
   }
 #endif
 
@@ -495,10 +501,10 @@ BreakAntiDependencies(std::vector<SUnit>& SUnits,
     // TODO: Instead of picking the first free register, consider which might
     // be the best.
     if (AntiDepReg != 0) {
-      if (unsigned NewReg = findSuitableFreeRegister(AntiDepReg,
+      if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg,
                                                      LastNewReg[AntiDepReg],
                                                      RC)) {
-        DEBUG(errs() << "Breaking anti-dependence edge on "
+        DEBUG(dbgs() << "Breaking anti-dependence edge on "
               << TRI->getName(AntiDepReg)
               << " with " << RegRefs.count(AntiDepReg) << " references"
               << " using " << TRI->getName(NewReg) << "!\n");
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 496888d..9e8db02 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -64,11 +64,12 @@ namespace llvm {
   public:
     CriticalAntiDepBreaker(MachineFunction& MFi);
     ~CriticalAntiDepBreaker();
-    
+
     /// Start - Initialize anti-dep breaking for a new basic block.
     void StartBlock(MachineBasicBlock *BB);
 
-    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+    /// path
     /// of the ScheduleDAG and break them by renaming registers.
     ///
     unsigned BreakAntiDependencies(std::vector<SUnit>& SUnits,
@@ -87,7 +88,8 @@ namespace llvm {
   private:
     void PrescanInstruction(MachineInstr *MI);
     void ScanInstruction(MachineInstr *MI, unsigned Count);
-    unsigned findSuitableFreeRegister(unsigned AntiDepReg,
+    unsigned findSuitableFreeRegister(MachineInstr *MI,
+                                      unsigned AntiDepReg,
                                       unsigned LastNewReg,
                                       const TargetRegisterClass *);
   };
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 07a5d38..0982eab 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -109,7 +109,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
 
       // If the instruction is dead, delete it!
       if (isDead(MI)) {
-        DEBUG(errs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+        DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
         AnyChanges = true;
         MI->eraseFromParent();
         MIE = MBB->rend();
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index a6429f7..11a85a0 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -37,7 +37,7 @@ namespace llvm {
 /// startFunction - This callback is invoked when a new machine function is
 /// about to be emitted.
 void ELFCodeEmitter::startFunction(MachineFunction &MF) {
-  DEBUG(errs() << "processing function: "
+  DEBUG(dbgs() << "processing function: "
         << MF.getFunction()->getName() << "\n");
 
   // Get the ELF Section that this function belongs in.
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 3e1ee11..5e5f589 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -1076,7 +1076,7 @@ void ELFWriter::OutputSectionsAndSectionTable() {
   // Emit all of sections to the file and build the section header table.
   for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
     ELFSection &S = *(*I);
-    DEBUG(errs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+    DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
                  << ", Size: " << S.Size << ", Offset: " << S.Offset
                  << ", SectionData Size: " << S.size() << "\n");
 
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp
index 36925b1..266c74c 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/ExactHazardRecognizer.cpp
@@ -48,7 +48,7 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) :
   Scoreboard = new unsigned[ScoreboardDepth];
   ScoreboardHead = 0;
 
-  DEBUG(errs() << "Using exact hazard recognizer: ScoreboardDepth = " 
+  DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " 
                << ScoreboardDepth << '\n');
 }
 
@@ -66,7 +66,7 @@ unsigned ExactHazardRecognizer::getFutureIndex(unsigned offset) {
 }
 
 void ExactHazardRecognizer::dumpScoreboard() {
-  errs() << "Scoreboard:\n";
+  dbgs() << "Scoreboard:\n";
   
   unsigned last = ScoreboardDepth - 1;
   while ((last > 0) && (Scoreboard[getFutureIndex(last)] == 0))
@@ -74,10 +74,10 @@ void ExactHazardRecognizer::dumpScoreboard() {
 
   for (unsigned i = 0; i <= last; i++) {
     unsigned FUs = Scoreboard[getFutureIndex(i)];
-    errs() << "\t";
+    dbgs() << "\t";
     for (int j = 31; j >= 0; j--)
-      errs() << ((FUs & (1 << j)) ? '1' : '0');
-    errs() << '\n';
+      dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+    dbgs() << '\n';
   }
 }
 
@@ -102,8 +102,8 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
       unsigned index = getFutureIndex(cycle + i);
       unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
       if (!freeUnits) {
-        DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", ");
-        DEBUG(errs() << "SU(" << SU->NodeNum << "): ");
+        DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+        DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
         DEBUG(SU->getInstr()->dump());
         return Hazard;
       }
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 4d25dcc..055172b 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Pass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -92,7 +93,7 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
     }
   }
  
-  errs() << "unsupported GC: " << Name << "\n";
+  dbgs() << "unsupported GC: " << Name << "\n";
   llvm_unreachable(0);
 }
 
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 6e0bde6..79b2986 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -109,7 +110,7 @@ GCStrategy::~GCStrategy() {
 bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
  
 bool GCStrategy::performCustomLowering(Function &F) {
-  errs() << "gc " << getName() << " must override performCustomLowering.\n";
+  dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
   llvm_unreachable(0);
   return 0;
 }
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index c23d707..c61fd17 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -229,14 +229,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
   if (!TII) return false;
 
-  DEBUG(errs() << "\nIfcvt: function (" << ++FnNum <<  ") \'"
+  DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum <<  ") \'"
                << MF.getFunction()->getName() << "\'");
 
   if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
-    DEBUG(errs() << " skipped\n");
+    DEBUG(dbgs() << " skipped\n");
     return false;
   }
-  DEBUG(errs() << "\n");
+  DEBUG(dbgs() << "\n");
 
   MF.RenumberBlocks();
   BBAnalysis.resize(MF.getNumBlockIDs());
@@ -281,13 +281,13 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
       case ICSimpleFalse: {
         bool isFalse = Kind == ICSimpleFalse;
         if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
-        DEBUG(errs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+        DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
                      << "): BB#" << BBI.BB->getNumber() << " ("
                      << ((Kind == ICSimpleFalse)
                          ? BBI.FalseBB->getNumber()
                          : BBI.TrueBB->getNumber()) << ") ");
         RetVal = IfConvertSimple(BBI, Kind);
-        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
           if (isFalse) NumSimpleFalse++;
           else         NumSimple++;
@@ -304,16 +304,16 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
         if (DisableTriangleR && !isFalse && isRev) break;
         if (DisableTriangleF && isFalse && !isRev) break;
         if (DisableTriangleFR && isFalse && isRev) break;
-        DEBUG(errs() << "Ifcvt (Triangle");
+        DEBUG(dbgs() << "Ifcvt (Triangle");
         if (isFalse)
-          DEBUG(errs() << " false");
+          DEBUG(dbgs() << " false");
         if (isRev)
-          DEBUG(errs() << " rev");
-        DEBUG(errs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+          DEBUG(dbgs() << " rev");
+        DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
                      << BBI.TrueBB->getNumber() << ",F:"
                      << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertTriangle(BBI, Kind);
-        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) {
           if (isFalse) {
             if (isRev) NumTriangleFRev++;
@@ -327,11 +327,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
       }
       case ICDiamond: {
         if (DisableDiamond) break;
-        DEBUG(errs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+        DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
                      << BBI.TrueBB->getNumber() << ",F:"
                      << BBI.FalseBB->getNumber() << ") ");
         RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
-        DEBUG(errs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) NumDiamonds++;
         break;
       }
@@ -1141,7 +1141,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
       continue;
     if (!TII->PredicateInstruction(I, Cond)) {
 #ifndef NDEBUG
-      errs() << "Unable to predicate " << *I << "!\n";
+      dbgs() << "Unable to predicate " << *I << "!\n";
 #endif
       llvm_unreachable(0);
     }
@@ -1177,7 +1177,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
     if (!isPredicated)
       if (!TII->PredicateInstruction(MI, Cond)) {
 #ifndef NDEBUG
-        errs() << "Unable to predicate " << *I << "!\n";
+        dbgs() << "Unable to predicate " << *I << "!\n";
 #endif
         llvm_unreachable(0);
       }
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 8a3bd0b..9997a48 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -349,12 +349,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   case Intrinsic::setjmp: {
     Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
                                Type::getInt32Ty(Context));
-    if (CI->getType() != Type::getVoidTy(Context))
+    if (!CI->getType()->isVoidTy())
       CI->replaceAllUsesWith(V);
     break;
   }
   case Intrinsic::sigsetjmp:
-     if (CI->getType() != Type::getVoidTy(Context))
+     if (!CI->getType()->isVoidTy())
        CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
      break;
 
@@ -427,10 +427,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
 
-  case Intrinsic::dbg_stoppoint:
-  case Intrinsic::dbg_region_start:
-  case Intrinsic::dbg_region_end:
-  case Intrinsic::dbg_func_start:
   case Intrinsic::dbg_declare:
     break;    // Simply strip out debugging intrinsics
 
@@ -512,7 +508,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   }
   case Intrinsic::flt_rounds:
      // Lower to "round to the nearest"
-     if (CI->getType() != Type::getVoidTy(Context))
+     if (!CI->getType()->isVoidTy())
        CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
      break;
   case Intrinsic::invariant_start:
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index d5fd051..2b5fd2c 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
@@ -61,6 +62,7 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
     cl::desc("Verify generated machine code"),
     cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
+
 // Enable or disable FastISel. Both options are needed, because
 // FastISel is enabled by default with -fast, and we wish to be
 // able to enable or disable fast-isel independently from -O0.
@@ -246,7 +248,7 @@ static void printAndVerify(PassManagerBase &PM,
                            const char *Banner,
                            bool allowDoubleDefs = false) {
   if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(errs(), Banner));
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
 
   if (VerifyMachineCode)
     PM.add(createMachineVerifierPass(allowDoubleDefs));
@@ -269,7 +271,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None && !DisableLSR) {
     PM.add(createLoopStrengthReducePass(getTargetLowering()));
     if (PrintLSR)
-      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &errs()));
+      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
   }
 
   // Turn exception handling constructs into something the code generators can
@@ -278,8 +280,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   {
   case ExceptionHandling::SjLj:
     // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
-    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
+    // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+    // catch info can get misplaced when a selector ends up more than one block
+    // removed from the parent invoke(s). This could happen when a landing
+    // pad is shared by multiple invokes and is also a target of a normal
+    // edge from elsewhere.
     PM.add(createSjLjEHPass(getTargetLowering()));
+    PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
     break;
   case ExceptionHandling::Dwarf:
     PM.add(createDwarfEHPass(getTargetLowering(), OptLevel==CodeGenOpt::None));
@@ -302,7 +309,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (PrintISelInput)
     PM.add(createPrintFunctionPass("\n\n"
                                    "*** Final LLVM Code input to ISel ***\n",
-                                   &errs()));
+                                   &dbgs()));
 
   // Standard Lower-Level Passes.
 
@@ -323,6 +330,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                  /* allowDoubleDefs= */ true);
 
   if (OptLevel != CodeGenOpt::None) {
+    PM.add(createOptimizeExtsPass());
     if (!DisableMachineLICM)
       PM.add(createMachineLICMPass());
     if (!DisableMachineSink)
@@ -335,7 +343,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   if (OptLevel != CodeGenOpt::None &&
       !DisableTailDuplicate && PreAllocTailDup) {
     PM.add(createTailDuplicatePass(true));
-    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate",
+                   /* allowDoubleDefs= */ true);
   }
 
   // Run pre-ra passes.
@@ -391,7 +400,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   PM.add(createGCMachineCodeAnalysisPass());
 
   if (PrintGCInfo)
-    PM.add(createGCInfoPrinter(errs()));
+    PM.add(createGCInfoPrinter(dbgs()));
 
   if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
     PM.add(createCodePlacementOptPass());
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index cc286aa..e207f60 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -10,7 +10,7 @@
 // This file implements the LiveRange and LiveInterval classes.  Given some
 // numbering of each the machine instructions an interval [i, j) is said to be a
 // live interval for register v if there is no instruction with number j' > j
-// such that v is live at j' abd there is no instruction with number i' < i such
+// such that v is live at j' and there is no instruction with number i' < i such
 // that v is live at i'. In this implementation intervals can have holes,
 // i.e. an interval might look like [1,20), [50,65), [1000,1001).  Each
 // individual range is represented as an instance of LiveRange, and the whole
@@ -24,6 +24,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
@@ -813,7 +814,7 @@ raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
 }
 
 void LiveRange::dump() const {
-  errs() << *this << "\n";
+  dbgs() << *this << "\n";
 }
 
 void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
@@ -872,7 +873,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
 }
 
 void LiveInterval::dump() const {
-  errs() << *this << "\n";
+  dbgs() << *this << "\n";
 }
 
 
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 452f872..e0e2ec8 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -146,7 +146,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const {
 }
 
 void LiveIntervals::dumpInstrs() const {
-  printInstrs(errs());
+  printInstrs(dbgs());
 }
 
 bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
@@ -253,9 +253,9 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
 #ifndef NDEBUG
 static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
   if (TargetRegisterInfo::isPhysicalRegister(reg))
-    errs() << tri_->getName(reg);
+    dbgs() << tri_->getName(reg);
   else
-    errs() << "%reg" << reg;
+    dbgs() << "%reg" << reg;
 }
 #endif
 
@@ -266,7 +266,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                              unsigned MOIdx,
                                              LiveInterval &interval) {
   DEBUG({
-      errs() << "\t\tregister: ";
+      dbgs() << "\t\tregister: ";
       printRegName(interval.reg, tri_);
     });
 
@@ -314,7 +314,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                "Shouldn't be alive across any blocks!");
         LiveRange LR(defIndex, killIdx, ValNo);
         interval.addRange(LR);
-        DEBUG(errs() << " +" << LR << "\n");
+        DEBUG(dbgs() << " +" << LR << "\n");
         ValNo->addKill(killIdx);
         return;
       }
@@ -325,7 +325,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     // live into some number of blocks, but gets killed.  Start by adding a
     // range that goes from this definition to the end of the defining block.
     LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo);
-    DEBUG(errs() << " +" << NewLR);
+    DEBUG(dbgs() << " +" << NewLR);
     interval.addRange(NewLR);
 
     // Iterate over all of the blocks that the variable is completely
@@ -336,7 +336,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
       LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
       interval.addRange(LR);
-      DEBUG(errs() << " +" << LR);
+      DEBUG(dbgs() << " +" << LR);
     }
 
     // Finally, this virtual register is live from the start of any killing
@@ -348,7 +348,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo);
       interval.addRange(LR);
       ValNo->addKill(killIdx);
-      DEBUG(errs() << " +" << LR);
+      DEBUG(dbgs() << " +" << LR);
     }
 
   } else {
@@ -393,7 +393,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
-      DEBUG(errs() << " replace range with " << LR);
+      DEBUG(dbgs() << " replace range with " << LR);
       interval.addRange(LR);
       ValNo->addKill(RedefIndex);
 
@@ -404,8 +404,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
                                     OldValNo));
 
       DEBUG({
-          errs() << " RESULT: ";
-          interval.print(errs(), tri_);
+          dbgs() << " RESULT: ";
+          interval.print(dbgs(), tri_);
         });
     } else {
       // Otherwise, this must be because of phi elimination.  If this is the
@@ -422,8 +422,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
           SlotIndex Start = getMBBStartIdx(Killer->getParent());
           SlotIndex End = getInstructionIndex(Killer).getDefIndex();
           DEBUG({
-              errs() << "\n\t\trenaming [" << Start << "," << End << "] in: ";
-              interval.print(errs(), tri_);
+              dbgs() << "\n\t\trenaming [" << Start << "," << End << "] in: ";
+              interval.print(dbgs(), tri_);
             });
           interval.removeRange(Start, End);
 
@@ -442,8 +442,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         VNI->addKill(indexes_->getTerminatorGap(killMBB));
         VNI->setHasPHIKill(true);
         DEBUG({
-            errs() << " RESULT: ";
-            interval.print(errs(), tri_);
+            dbgs() << " RESULT: ";
+            interval.print(dbgs(), tri_);
           });
       }
 
@@ -469,11 +469,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       interval.addRange(LR);
       ValNo->addKill(indexes_->getTerminatorGap(mbb));
       ValNo->setHasPHIKill(true);
-      DEBUG(errs() << " +" << LR);
+      DEBUG(dbgs() << " +" << LR);
     }
   }
 
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
 }
 
 void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
@@ -485,7 +485,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
   // A physical register cannot be live across basic block, so its
   // lifetime must end somewhere in its defining basic block.
   DEBUG({
-      errs() << "\t\tregister: ";
+      dbgs() << "\t\tregister: ";
       printRegName(interval.reg, tri_);
     });
 
@@ -502,7 +502,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
   // For earlyclobbers, the defSlot was pushed back one; the extra
   // advance below compensates.
   if (MO.isDead()) {
-    DEBUG(errs() << " dead");
+    DEBUG(dbgs() << " dead");
     end = start.getStoreIndex();
     goto exit;
   }
@@ -517,7 +517,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
       baseIndex = indexes_->getNextNonNullIndex(baseIndex);
 
     if (mi->killsRegister(interval.reg, tri_)) {
-      DEBUG(errs() << " killed");
+      DEBUG(dbgs() << " killed");
       end = baseIndex.getDefIndex();
       goto exit;
     } else {
@@ -531,7 +531,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
           // Then the register is essentially dead at the instruction that defines
           // it. Hence its interval is:
           // [defSlot(def), defSlot(def)+1)
-          DEBUG(errs() << " dead");
+          DEBUG(dbgs() << " dead");
           end = start.getStoreIndex();
         }
         goto exit;
@@ -560,7 +560,7 @@ exit:
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
   LR.valno->addKill(end);
-  DEBUG(errs() << " +" << LR << '\n');
+  DEBUG(dbgs() << " +" << LR << '\n');
 }
 
 void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
@@ -595,7 +595,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
                                          SlotIndex MIIdx,
                                          LiveInterval &interval, bool isAlias) {
   DEBUG({
-      errs() << "\t\tlivein register: ";
+      dbgs() << "\t\tlivein register: ";
       printRegName(interval.reg, tri_);
     });
 
@@ -612,7 +612,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   
   while (mi != MBB->end()) {
     if (mi->killsRegister(interval.reg, tri_)) {
-      DEBUG(errs() << " killed");
+      DEBUG(dbgs() << " killed");
       end = baseIndex.getDefIndex();
       SeenDefUse = true;
       break;
@@ -621,7 +621,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
       // Then the register is essentially dead at the instruction that defines
       // it. Hence its interval is:
       // [defSlot(def), defSlot(def)+1)
-      DEBUG(errs() << " dead");
+      DEBUG(dbgs() << " dead");
       end = start.getStoreIndex();
       SeenDefUse = true;
       break;
@@ -636,10 +636,10 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   // Live-in register might not be used at all.
   if (!SeenDefUse) {
     if (isAlias) {
-      DEBUG(errs() << " dead");
+      DEBUG(dbgs() << " dead");
       end = MIIdx.getStoreIndex();
     } else {
-      DEBUG(errs() << " live through");
+      DEBUG(dbgs() << " live through");
       end = baseIndex;
     }
   }
@@ -652,7 +652,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
 
   interval.addRange(LR);
   LR.valno->addKill(end);
-  DEBUG(errs() << " +" << LR << '\n');
+  DEBUG(dbgs() << " +" << LR << '\n');
 }
 
 /// computeIntervals - computes the live intervals for virtual
@@ -660,7 +660,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
 /// live interval is an interval [i, j) where 1 <= i <= j < N for
 /// which a variable is live
 void LiveIntervals::computeIntervals() { 
-  DEBUG(errs() << "********** COMPUTING LIVE INTERVALS **********\n"
+  DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
                << "********** Function: "
                << ((Value*)mf_->getFunction())->getName() << '\n');
 
@@ -670,7 +670,7 @@ void LiveIntervals::computeIntervals() {
     MachineBasicBlock *MBB = MBBI;
     // Track the index of the current machine instr.
     SlotIndex MIIndex = getMBBStartIdx(MBB);
-    DEBUG(errs() << MBB->getName() << ":\n");
+    DEBUG(dbgs() << MBB->getName() << ":\n");
 
     MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
 
@@ -690,7 +690,7 @@ void LiveIntervals::computeIntervals() {
       MIIndex = indexes_->getNextNonNullIndex(MIIndex);
     
     for (; MI != miEnd; ++MI) {
-      DEBUG(errs() << MIIndex << "\t" << *MI);
+      DEBUG(dbgs() << MIIndex << "\t" << *MI);
 
       // Handle defs.
       for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
@@ -1055,7 +1055,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       // If this is the rematerializable definition MI itself and
       // all of its uses are rematerialized, simply delete it.
       if (MI == ReMatOrigDefMI && CanDelete) {
-        DEBUG(errs() << "\t\t\t\tErasing re-materlizable def: "
+        DEBUG(dbgs() << "\t\t\t\tErasing re-materlizable def: "
                      << MI << '\n');
         RemoveMachineInstrFromMaps(MI);
         vrm.RemoveMachineInstrFromMaps(MI);
@@ -1208,28 +1208,28 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
       if (CreatedNewVReg) {
         LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
                      nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
-        DEBUG(errs() << " +" << LR);
+        DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
       } else {
         // Extend the split live interval to this def / use.
         SlotIndex End = index.getDefIndex();
         LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
                      nI.getValNumInfo(nI.getNumValNums()-1));
-        DEBUG(errs() << " +" << LR);
+        DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
       }
     }
     if (HasDef) {
       LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
                    nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
-      DEBUG(errs() << " +" << LR);
+      DEBUG(dbgs() << " +" << LR);
       nI.addRange(LR);
     }
 
     DEBUG({
-        errs() << "\t\t\t\tAdded new interval: ";
-        nI.print(errs(), tri_);
-        errs() << '\n';
+        dbgs() << "\t\t\t\tAdded new interval: ";
+        nI.print(dbgs(), tri_);
+        dbgs() << '\n';
       });
   }
   return CanFold;
@@ -1557,9 +1557,9 @@ addIntervalsForSpillsFast(const LiveInterval &li,
          "attempt to spill already spilled interval!");
 
   DEBUG({
-      errs() << "\t\t\t\tadding intervals for spills for interval: ";
+      dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
       li.dump();
-      errs() << '\n';
+      dbgs() << '\n';
     });
 
   const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
@@ -1610,7 +1610,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
         LiveRange LR(index.getLoadIndex(), index.getUseIndex(),
                      nI.getNextValue(SlotIndex(), 0, false,
                                      getVNInfoAllocator()));
-        DEBUG(errs() << " +" << LR);
+        DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
         vrm.addRestorePoint(NewVReg, MI);
       }
@@ -1618,7 +1618,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
         LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
                      nI.getNextValue(SlotIndex(), 0, false,
                                      getVNInfoAllocator()));
-        DEBUG(errs() << " +" << LR);
+        DEBUG(dbgs() << " +" << LR);
         nI.addRange(LR);
         vrm.addSpillPoint(NewVReg, true, MI);
       }
@@ -1626,9 +1626,9 @@ addIntervalsForSpillsFast(const LiveInterval &li,
       added.push_back(&nI);
         
       DEBUG({
-          errs() << "\t\t\t\tadded new interval: ";
+          dbgs() << "\t\t\t\tadded new interval: ";
           nI.dump();
-          errs() << '\n';
+          dbgs() << '\n';
         });
     }
     
@@ -1651,9 +1651,9 @@ addIntervalsForSpills(const LiveInterval &li,
          "attempt to spill already spilled interval!");
 
   DEBUG({
-      errs() << "\t\t\t\tadding intervals for spills for interval: ";
-      li.print(errs(), tri_);
-      errs() << '\n';
+      dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
+      li.print(dbgs(), tri_);
+      dbgs() << '\n';
     });
 
   // Each bit specify whether a spill is required in the MBB.
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 3c88e37..b44a220 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -30,6 +30,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -59,17 +60,17 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
 }
 
 void LiveVariables::VarInfo::dump() const {
-  errs() << "  Alive in blocks: ";
+  dbgs() << "  Alive in blocks: ";
   for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
            E = AliveBlocks.end(); I != E; ++I)
-    errs() << *I << ", ";
-  errs() << "\n  Killed by:";
+    dbgs() << *I << ", ";
+  dbgs() << "\n  Killed by:";
   if (Kills.empty())
-    errs() << " No instructions.\n";
+    dbgs() << " No instructions.\n";
   else {
     for (unsigned i = 0, e = Kills.size(); i != e; ++i)
-      errs() << "\n    #" << i << ": " << *Kills[i];
-    errs() << "\n";
+      dbgs() << "\n    #" << i << ": " << *Kills[i];
+    dbgs() << "\n";
   }
 }
 
@@ -289,7 +290,6 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
 
   MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
   unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
-  MachineInstr *LastPartDef = 0;
   unsigned LastPartDefDist = 0;
   for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
@@ -298,13 +298,9 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
       // There was a def of this sub-register in between. This is a partial
       // def, keep track of the last one.
       unsigned Dist = DistanceMap[Def];
-      if (Dist > LastPartDefDist) {
+      if (Dist > LastPartDefDist)
         LastPartDefDist = Dist;
-        LastPartDef = Def;
-      }
-      continue;
-    }
-    if (MachineInstr *Use = PhysRegUse[SubReg]) {
+    } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
       unsigned Dist = DistanceMap[Use];
       if (Dist > LastRefOrPartRefDist) {
         LastRefOrPartRefDist = Dist;
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index 80eb6cd..1121d9b 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -122,7 +122,7 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
          "Extract destination must be in a physical register");
   assert(SrcReg && "invalid subregister index for register");
 
-  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
+  DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
 
   if (SrcReg == DstReg) {
     // No need to insert an identity copy instruction.
@@ -131,11 +131,11 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
       // instruction with KILL.
       MI->setDesc(TII->get(TargetInstrInfo::KILL));
       MI->RemoveOperand(2);     // SubIdx
-      DEBUG(errs() << "subreg: replace by: " << *MI);
+      DEBUG(dbgs() << "subreg: replace by: " << *MI);
       return true;
     }
 
-    DEBUG(errs() << "subreg: eliminated!");
+    DEBUG(dbgs() << "subreg: eliminated!");
   } else {
     // Insert copy
     const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg);
@@ -150,11 +150,11 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
       TransferKillFlag(MI, SuperReg, TRI, true);
     DEBUG({
         MachineBasicBlock::iterator dMI = MI;
-        errs() << "subreg: " << *(--dMI);
+        dbgs() << "subreg: " << *(--dMI);
       });
   }
 
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
   MBB->erase(MI);
   return true;
 }
@@ -179,7 +179,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
   assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
          "Inserted value must be in a physical register");
 
-  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
+  DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
 
   if (DstSubReg == InsReg && InsSIdx == 0) {
     // No need to insert an identify copy instruction.
@@ -188,7 +188,7 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
     // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3
     // The first def is defining RAX, not EAX so the top bits were not
     // zero extended.
-    DEBUG(errs() << "subreg: eliminated!");
+    DEBUG(dbgs() << "subreg: eliminated!");
   } else {
     // Insert sub-register copy
     const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
@@ -203,11 +203,11 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
       TransferKillFlag(MI, InsReg, TRI);
     DEBUG({
         MachineBasicBlock::iterator dMI = MI;
-        errs() << "subreg: " << *(--dMI);
+        dbgs() << "subreg: " << *(--dMI);
       });
   }
 
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
   MBB->erase(MI);
   return true;
 }
@@ -235,7 +235,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
   assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
          "Inserted value must be in a physical register");
 
-  DEBUG(errs() << "subreg: CONVERTING: " << *MI);
+  DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
 
   if (DstSubReg == InsReg) {
     // No need to insert an identity copy instruction. If the SrcReg was
@@ -248,7 +248,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
       else
         MIB.addReg(InsReg, RegState::Kill);
     } else {
-      DEBUG(errs() << "subreg: eliminated!\n");
+      DEBUG(dbgs() << "subreg: eliminated!\n");
       MBB->erase(MI);
       return true;
     }
@@ -288,7 +288,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
 
   DEBUG({
       MachineBasicBlock::iterator dMI = MI;
-      errs() << "subreg: " << *(--dMI) << "\n";
+      dbgs() << "subreg: " << *(--dMI) << "\n";
     });
 
   MBB->erase(MI);
@@ -299,7 +299,7 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
 /// copies.
 ///
 bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "Machine Function\n"  
+  DEBUG(dbgs() << "Machine Function\n"  
                << "********** LOWERING SUBREG INSTRS **********\n"
                << "********** Function: " 
                << MF.getFunction()->getName() << '\n');
diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp
index 73b15ed..337eab1 100644
--- a/lib/CodeGen/MachOWriter.cpp
+++ b/lib/CodeGen/MachOWriter.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetMachOWriterInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/OutputBuffer.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -634,7 +635,7 @@ void MachOWriter::InitMem(const Constant *C, uintptr_t Offset,
       }
       case Instruction::Add:
       default:
-        errs() << "ConstantExpr not handled as global var init: " << *CE <<"\n";
+        dbgs() << "ConstantExpr not handled as global var init: " << *CE <<"\n";
         llvm_unreachable(0);
       }
     } else if (PC->getType()->isSingleValueType()) {
@@ -732,7 +733,7 @@ void MachOWriter::InitMem(const Constant *C, uintptr_t Offset,
         WorkList.push_back(CPair(CPS->getOperand(i),
                                  PA+SL->getElementOffset(i)));
     } else {
-      errs() << "Bad Type: " << *PC->getType() << "\n";
+      dbgs() << "Bad Type: " << *PC->getType() << "\n";
       llvm_unreachable("Unknown constant type to initialize memory with!");
     }
   }
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 74a0d57..e2ce642 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Assembly/Writer.h"
@@ -158,7 +159,7 @@ bool MachineBasicBlock::isOnlyReachableByFallthrough() const {
 }
 
 void MachineBasicBlock::dump() const {
-  print(errs());
+  print(dbgs());
 }
 
 static inline void OutputReg(raw_ostream &os, unsigned RegNo,
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index dd6fd7e..ae9451c 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -299,7 +300,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
 }
 
 void MachineFunction::dump() const {
-  print(errs());
+  print(dbgs());
 }
 
 void MachineFunction::print(raw_ostream &OS) const {
@@ -519,7 +520,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
 }
 
 void MachineFrameInfo::dump(const MachineFunction &MF) const {
-  print(MF, errs());
+  print(MF, dbgs());
 }
 
 //===----------------------------------------------------------------------===//
@@ -579,7 +580,7 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
   OS << '\n';
 }
 
-void MachineJumpTableInfo::dump() const { print(errs()); }
+void MachineJumpTableInfo::dump() const { print(dbgs()); }
 
 
 //===----------------------------------------------------------------------===//
@@ -702,4 +703,4 @@ void MachineConstantPool::print(raw_ostream &OS) const {
   }
 }
 
-void MachineConstantPool::dump() const { print(errs()); }
+void MachineConstantPool::dump() const { print(dbgs()); }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index a761c2d..cf3e3e1 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -28,11 +28,13 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Metadata.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -277,10 +279,15 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << '>';
     break;
   case MachineOperand::MO_BlockAddress:
-    OS << "<";
+    OS << '<';
     WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
     OS << '>';
     break;
+  case MachineOperand::MO_Metadata:
+    OS << '<';
+    WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
+    OS << '>';
+    break;
   default:
     llvm_unreachable("Unrecognized operand type");
   }
@@ -1094,7 +1101,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
 }
 
 void MachineInstr::dump() const {
-  errs() << "  " << *this;
+  dbgs() << "  " << *this;
 }
 
 void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
@@ -1313,3 +1320,12 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
                                        true  /*IsDead*/));
   return true;
 }
+
+void MachineInstr::addRegisterDefined(unsigned IncomingReg,
+                                      const TargetRegisterInfo *RegInfo) {
+  MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+  if (!MO || MO->getSubReg())
+    addOperand(MachineOperand::CreateReg(IncomingReg,
+                                         true  /*IsDef*/,
+                                         true  /*IsImp*/));
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 0a57ea1..ffcc8ab 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -161,7 +161,7 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
 /// loop.
 ///
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "******** Machine LICM ********\n");
+  DEBUG(dbgs() << "******** Machine LICM ********\n");
 
   Changed = FirstInLoop = false;
   MCP = MF.getConstantPool();
@@ -253,28 +253,28 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
   }
 
   DEBUG({
-      errs() << "--- Checking if we can hoist " << I;
+      dbgs() << "--- Checking if we can hoist " << I;
       if (I.getDesc().getImplicitUses()) {
-        errs() << "  * Instruction has implicit uses:\n";
+        dbgs() << "  * Instruction has implicit uses:\n";
 
         const TargetRegisterInfo *TRI = TM->getRegisterInfo();
         for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
              *ImpUses; ++ImpUses)
-          errs() << "      -> " << TRI->getName(*ImpUses) << "\n";
+          dbgs() << "      -> " << TRI->getName(*ImpUses) << "\n";
       }
 
       if (I.getDesc().getImplicitDefs()) {
-        errs() << "  * Instruction has implicit defines:\n";
+        dbgs() << "  * Instruction has implicit defines:\n";
 
         const TargetRegisterInfo *TRI = TM->getRegisterInfo();
         for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
              *ImpDefs; ++ImpDefs)
-          errs() << "      -> " << TRI->getName(*ImpDefs) << "\n";
+          dbgs() << "      -> " << TRI->getName(*ImpDefs) << "\n";
       }
     });
 
   if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
-    DEBUG(errs() << "Cannot hoist with implicit defines or uses\n");
+    DEBUG(dbgs() << "Cannot hoist with implicit defines or uses\n");
     return false;
   }
 
@@ -479,7 +479,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
     return false;
 
   if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
-    DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
+    DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = MI->getOperand(i);
       if (MO.isReg() && MO.isDef())
@@ -506,14 +506,14 @@ void MachineLICM::Hoist(MachineInstr *MI) {
   // Now move the instructions to the predecessor, inserting it before any
   // terminator instructions.
   DEBUG({
-      errs() << "Hoisting " << *MI;
+      dbgs() << "Hoisting " << *MI;
       if (CurPreheader->getBasicBlock())
-        errs() << " to MachineBasicBlock "
+        dbgs() << " to MachineBasicBlock "
                << CurPreheader->getName();
       if (MI->getParent()->getBasicBlock())
-        errs() << " from MachineBasicBlock "
+        dbgs() << " from MachineBasicBlock "
                << MI->getParent()->getName();
-      errs() << "\n";
+      dbgs() << "\n";
     });
 
   // If this is the first instruction being hoisted to the preheader,
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index d561a5b..269538b 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 namespace llvm {
@@ -73,3 +74,7 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
   }
   return BotMBB;
 }
+
+void MachineLoop::dump() const {
+  print(dbgs());
+}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 292096f..467ea5d 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -210,7 +210,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
   // If the client wants to know about all new instructions, tell it.
   if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
 
-  DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
   return InsertedPHI->getOperand(0).getReg();
 }
 
@@ -383,7 +383,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
     InsertedPHI->eraseFromParent();
     InsertedVal = ConstVal;
   } else {
-    DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+    DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
 
     // If the client wants to know about all new instructions, tell it.
     if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index e040738..c177e3c 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -90,7 +90,7 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
 }
 
 bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "******** Machine Sinking ********\n");
+  DEBUG(dbgs() << "******** Machine Sinking ********\n");
   
   const TargetMachine &TM = MF.getTarget();
   TII = TM.getInstrInfo();
@@ -255,15 +255,15 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
   if (MI->getParent() == SuccToSinkTo)
     return false;
   
-  DEBUG(errs() << "Sink instr " << *MI);
-  DEBUG(errs() << "to block " << *SuccToSinkTo);
+  DEBUG(dbgs() << "Sink instr " << *MI);
+  DEBUG(dbgs() << "to block " << *SuccToSinkTo);
   
   // If the block has multiple predecessors, this would introduce computation on
   // a path that it doesn't already exist.  We could split the critical edge,
   // but for now we just punt.
   // FIXME: Split critical edges if not backedges.
   if (SuccToSinkTo->pred_size() > 1) {
-    DEBUG(errs() << " *** PUNTING: Critical edge found\n");
+    DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
     return false;
   }
   
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 0772319..584c21b 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -190,8 +190,7 @@ namespace {
     void report(const char *msg, const MachineOperand *MO, unsigned MONum);
 
     void markReachable(const MachineBasicBlock *MBB);
-    void calcMaxRegsPassed();
-    void calcMinRegsPassed();
+    void calcRegsPassed();
     void checkPHIOps(const MachineBasicBlock *MBB);
 
     void calcRegsRequired();
@@ -710,7 +709,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
 // Calculate the largest possible vregsPassed sets. These are the registers that
 // can pass through an MBB live, but may not be live every time. It is assumed
 // that all vregsPassed sets are empty before the call.
-void MachineVerifier::calcMaxRegsPassed() {
+void MachineVerifier::calcRegsPassed() {
   // First push live-out regs to successors' vregsPassed. Remember the MBBs that
   // have any vregsPassed.
   DenseSet<const MachineBasicBlock*> todo;
@@ -745,45 +744,9 @@ void MachineVerifier::calcMaxRegsPassed() {
   }
 }
 
-// Calculate the minimum vregsPassed set. These are the registers that always
-// pass live through an MBB. The calculation assumes that calcMaxRegsPassed has
-// been called earlier.
-void MachineVerifier::calcMinRegsPassed() {
-  DenseSet<const MachineBasicBlock*> todo;
-  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
-       MFI != MFE; ++MFI)
-    todo.insert(MFI);
-
-  while (!todo.empty()) {
-    const MachineBasicBlock *MBB = *todo.begin();
-    todo.erase(MBB);
-    BBInfo &MInfo = MBBInfoMap[MBB];
-
-    // Remove entries from vRegsPassed that are not live out from all
-    // reachable predecessors.
-    RegSet dead;
-    for (RegSet::iterator I = MInfo.vregsPassed.begin(),
-           E = MInfo.vregsPassed.end(); I != E; ++I) {
-      for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
-             PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
-        BBInfo &PrInfo = MBBInfoMap[*PrI];
-        if (PrInfo.reachable && !PrInfo.isLiveOut(*I)) {
-          dead.insert(*I);
-          break;
-        }
-      }
-    }
-    // If any regs removed, we need to recheck successors.
-    if (!dead.empty()) {
-      set_subtract(MInfo.vregsPassed, dead);
-      todo.insert(MBB->succ_begin(), MBB->succ_end());
-    }
-  }
-}
-
 // Calculate the set of virtual registers that must be passed through each basic
 // block in order to satisfy the requirements of successor blocks. This is very
-// similar to calcMaxRegsPassed, only backwards.
+// similar to calcRegsPassed, only backwards.
 void MachineVerifier::calcRegsRequired() {
   // First push live-in regs to predecessors' vregsRequired.
   DenseSet<const MachineBasicBlock*> todo;
@@ -817,7 +780,7 @@ void MachineVerifier::calcRegsRequired() {
 }
 
 // Check PHI instructions at the beginning of MBB. It is assumed that
-// calcMinRegsPassed has been run so BBInfo::isLiveOut is valid.
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
 void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
   for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
        BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
@@ -848,9 +811,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
 }
 
 void MachineVerifier::visitMachineFunctionAfter() {
-  calcMaxRegsPassed();
+  calcRegsPassed();
 
-  // With the maximal set of vregsPassed we can verify dead-in registers.
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI) {
     BBInfo &MInfo = MBBInfoMap[MFI];
@@ -859,31 +821,16 @@ void MachineVerifier::visitMachineFunctionAfter() {
     if (!MInfo.reachable)
       continue;
 
-    for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
-           PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
-      BBInfo &PrInfo = MBBInfoMap[*PrI];
-      if (!PrInfo.reachable)
-        continue;
-
-      // Verify physical live-ins. EH landing pads have magic live-ins so we
-      // ignore them.
-      if (!MFI->isLandingPad()) {
-        for (MachineBasicBlock::const_livein_iterator I = MFI->livein_begin(),
-               E = MFI->livein_end(); I != E; ++I) {
-          if (TargetRegisterInfo::isPhysicalRegister(*I) &&
-              !isReserved (*I) && !PrInfo.isLiveOut(*I)) {
-            report("Live-in physical register is not live-out from predecessor",
-                   MFI);
-            *OS << "Register " << TRI->getName(*I)
-                << " is not live-out from BB#" << (*PrI)->getNumber()
-                << ".\n";
-          }
-        }
-      }
+    checkPHIOps(MFI);
 
+    // Verify dead-in virtual registers.
+    if (!allowVirtDoubleDefs) {
+      for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
+             PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
+        BBInfo &PrInfo = MBBInfoMap[*PrI];
+        if (!PrInfo.reachable)
+          continue;
 
-      // Verify dead-in virtual registers.
-      if (!allowVirtDoubleDefs) {
         for (RegMap::iterator I = MInfo.vregsDeadIn.begin(),
                E = MInfo.vregsDeadIn.end(); I != E; ++I) {
           // DeadIn register must be in neither regsLiveOut or vregsPassed of
@@ -899,39 +846,6 @@ void MachineVerifier::visitMachineFunctionAfter() {
     }
   }
 
-  calcMinRegsPassed();
-
-  // With the minimal set of vregsPassed we can verify live-in virtual
-  // registers, including PHI instructions.
-  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
-       MFI != MFE; ++MFI) {
-    BBInfo &MInfo = MBBInfoMap[MFI];
-
-    // Skip unreachable MBBs.
-    if (!MInfo.reachable)
-      continue;
-
-    checkPHIOps(MFI);
-
-    for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
-           PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
-      BBInfo &PrInfo = MBBInfoMap[*PrI];
-      if (!PrInfo.reachable)
-        continue;
-
-      for (RegMap::iterator I = MInfo.vregsLiveIn.begin(),
-             E = MInfo.vregsLiveIn.end(); I != E; ++I) {
-        if (!PrInfo.isLiveOut(I->first)) {
-          report("Used virtual register is not live-in", I->second);
-          *OS << "Register %reg" << I->first
-              << " is not live-out from predecessor MBB #"
-              << (*PrI)->getNumber()
-              << ".\n";
-        }
-      }
-    }
-  }
-
   // Now check LiveVariables info if available
   if (LiveVars) {
     calcRegsRequired();
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
new file mode 100644
index 0000000..625ff89
--- /dev/null
+++ b/lib/CodeGen/OptimizeExts.cpp
@@ -0,0 +1,185 @@
+//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs optimization of sign / zero extension instructions. It
+// may be extended to handle other instructions of similar property.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This pass will
+// replace (some) uses of the pre-extension value with uses of the sub-register
+// of the results.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ext-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
+                                cl::desc("Aggressive extension optimization"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+
+namespace {
+  class OptimizeExts : public MachineFunctionPass {
+    const TargetMachine   *TM;
+    const TargetInstrInfo *TII;
+    MachineRegisterInfo *MRI;
+    MachineDominatorTree *DT;   // Machine dominator tree
+
+  public:
+    static char ID; // Pass identification
+    OptimizeExts() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      if (Aggressive) {
+        AU.addRequired<MachineDominatorTree>();
+        AU.addPreserved<MachineDominatorTree>();
+      }
+    }
+
+  private:
+    bool OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+                       SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+  };
+}
+
+char OptimizeExts::ID = 0;
+static RegisterPass<OptimizeExts>
+X("opt-exts", "Optimize sign / zero extensions");
+
+FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
+
+/// OptimizeInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify
+/// the source, and if the source value is preserved as a sub-register of
+/// the result, then replace all reachable uses of the source with the subreg
+/// of the result.
+bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+                                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+  bool Changed = false;
+  LocalMIs.insert(MI);
+
+  unsigned SrcReg, DstReg, SubIdx;
+  if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
+    if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+        TargetRegisterInfo::isPhysicalRegister(SrcReg))
+      return false;
+
+    MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
+    if (++UI == MRI->use_end())
+      // No other uses.
+      return false;
+
+    // Ok, the source has other uses. See if we can replace the other uses
+    // with use of the result of the extension.
+    SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+    UI = MRI->use_begin(DstReg);
+    for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+         ++UI)
+      ReachedBBs.insert(UI->getParent());
+
+    bool ExtendLife = true;
+    // Uses that are in the same BB of uses of the result of the instruction.
+    SmallVector<MachineOperand*, 8> Uses;
+    // Uses that the result of the instruction can reach.
+    SmallVector<MachineOperand*, 8> ExtendedUses;
+
+    UI = MRI->use_begin(SrcReg);
+    for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
+         ++UI) {
+      MachineOperand &UseMO = UI.getOperand();
+      MachineInstr *UseMI = &*UI;
+      if (UseMI == MI)
+        continue;
+      if (UseMI->getOpcode() == TargetInstrInfo::PHI) {
+        ExtendLife = false;
+        continue;
+      }
+
+      MachineBasicBlock *UseMBB = UseMI->getParent();
+      if (UseMBB == MBB) {
+        // Local uses that come after the extension.
+        if (!LocalMIs.count(UseMI))
+          Uses.push_back(&UseMO);
+      } else if (ReachedBBs.count(UseMBB))
+        // Non-local uses where the result of extension is used. Always
+        // replace these unless it's a PHI.
+        Uses.push_back(&UseMO);
+      else if (Aggressive && DT->dominates(MBB, UseMBB))
+        // We may want to extend live range of the extension result in order
+        // to replace these uses.
+        ExtendedUses.push_back(&UseMO);
+      else {
+        // Both will be live out of the def MBB anyway. Don't extend live
+        // range of the extension result.
+        ExtendLife = false;
+        break;
+      }
+    }
+
+    if (ExtendLife && !ExtendedUses.empty())
+      // Ok, we'll extend the liveness of the extension result.
+      std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+                std::back_inserter(Uses));
+
+    // Now replace all uses.
+    if (!Uses.empty()) {
+      const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+      for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+        MachineOperand *UseMO = Uses[i];
+        MachineInstr *UseMI = UseMO->getParent();
+        MachineBasicBlock *UseMBB = UseMI->getParent();
+        unsigned NewVR = MRI->createVirtualRegister(RC);
+        BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+                TII->get(TargetInstrInfo::EXTRACT_SUBREG), NewVR)
+          .addReg(DstReg).addImm(SubIdx);
+        UseMO->setReg(NewVR);
+        ++NumReuse;
+        Changed = true;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  MRI = &MF.getRegInfo();
+  DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+  bool Changed = false;
+
+  SmallPtrSet<MachineInstr*, 8> LocalMIs;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+    LocalMIs.clear();
+    for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
+         ++MII) {
+      MachineInstr *MI = &*MII;
+      Changed |= OptimizeInstr(MI, MBB, LocalMIs);
+    }
+  }
+
+  return Changed;
+}
diff --git a/lib/CodeGen/PBQP/AnnotatedGraph.h b/lib/CodeGen/PBQP/AnnotatedGraph.h
index a47dce9..738dea0 100644
--- a/lib/CodeGen/PBQP/AnnotatedGraph.h
+++ b/lib/CodeGen/PBQP/AnnotatedGraph.h
@@ -1,4 +1,4 @@
-//===-- AnnotatedGraph.h - Annotated PBQP Graph ----------------*- C++ --*-===//
+//===-- AnnotatedGraph.h - Annotated PBQP Graph -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/ExhaustiveSolver.h b/lib/CodeGen/PBQP/ExhaustiveSolver.h
index b2f2e6f..35ec4f1 100644
--- a/lib/CodeGen/PBQP/ExhaustiveSolver.h
+++ b/lib/CodeGen/PBQP/ExhaustiveSolver.h
@@ -1,4 +1,4 @@
-//===-- ExhaustiveSolver.h - Brute Force PBQP Solver -----------*- C++ --*-===//
+//===-- ExhaustiveSolver.h - Brute Force PBQP Solver ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/GraphBase.h b/lib/CodeGen/PBQP/GraphBase.h
index 0c7493b..becd98a 100644
--- a/lib/CodeGen/PBQP/GraphBase.h
+++ b/lib/CodeGen/PBQP/GraphBase.h
@@ -1,4 +1,4 @@
-//===-- GraphBase.h - Abstract Base PBQP Graph -----------------*- C++ --*-===//
+//===-- GraphBase.h - Abstract Base PBQP Graph ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index 1670877..f78a58a 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -1,4 +1,4 @@
-//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ --*-===//
+//===-- HeuristicSolver.h - Heuristic PBQP Solver ---------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
index 3ac9e70..1228f65 100644
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -1,4 +1,4 @@
-//===-- Briggs.h --- Briggs Heuristic for PBQP -----------------*- C++ --*-===//
+//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/PBQPMath.h b/lib/CodeGen/PBQP/PBQPMath.h
index 11f4b4b..20737a2 100644
--- a/lib/CodeGen/PBQP/PBQPMath.h
+++ b/lib/CodeGen/PBQP/PBQPMath.h
@@ -1,4 +1,4 @@
-//===-- PBQPMath.h - PBQP Vector and Matrix classes ------------*- C++ --*-===//
+//===-- PBQPMath.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/SimpleGraph.h b/lib/CodeGen/PBQP/SimpleGraph.h
index 1ca9cae..13e63ce 100644
--- a/lib/CodeGen/PBQP/SimpleGraph.h
+++ b/lib/CodeGen/PBQP/SimpleGraph.h
@@ -1,4 +1,4 @@
-//===-- SimpleGraph.h - Simple PBQP Graph ----------------------*- C++ --*-===//
+//===-- SimpleGraph.h - Simple PBQP Graph -----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
index c91e2fa..aee684d 100644
--- a/lib/CodeGen/PBQP/Solution.h
+++ b/lib/CodeGen/PBQP/Solution.h
@@ -1,4 +1,4 @@
-//===-- Solution.h ------- PBQP Solution -----------------------*- C++ --*-===//
+//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PBQP/Solver.h b/lib/CodeGen/PBQP/Solver.h
index a9c5f83..a445de8 100644
--- a/lib/CodeGen/PBQP/Solver.h
+++ b/lib/CodeGen/PBQP/Solver.h
@@ -1,4 +1,4 @@
-//===-- Solver.h ------- PBQP solver interface -----------------*- C++ --*-===//
+//===-- Solver.h ------- PBQP solver interface ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 58c3dec..365df30 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -207,7 +207,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
       IncomingReg = entry;
       reusedIncoming = true;
       ++NumReused;
-      DEBUG(errs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
+      DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
     } else {
       entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
     }
@@ -234,7 +234,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
       // AfterPHIsIt, so it appears before the current PHICopy.
       if (reusedIncoming)
         if (MachineInstr *OldKill = VI.findKill(&MBB)) {
-          DEBUG(errs() << "Remove old kill from " << *OldKill);
+          DEBUG(dbgs() << "Remove old kill from " << *OldKill);
           LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
           DEBUG(MBB.dump());
         }
@@ -421,7 +421,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
 
   MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
   MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB);
-  DEBUG(errs() << "PHIElimination splitting critical edge:"
+  DEBUG(dbgs() << "PHIElimination splitting critical edge:"
         " BB#" << A->getNumber()
         << " -- BB#" << NMBB->getNumber()
         << " -- BB#" << B->getNumber() << '\n');
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 79be295..f43395f 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -233,7 +233,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       TargetSubtarget::ANTIDEP_NONE;
   }
 
-  DEBUG(errs() << "PostRAScheduler\n");
+  DEBUG(dbgs() << "PostRAScheduler\n");
 
   const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
@@ -258,7 +258,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       static int bbcnt = 0;
       if (bbcnt++ % DebugDiv != DebugMod)
         continue;
-      errs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
+      dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
         ":BB#" << MBB->getNumber() << " ***\n";
     }
 #endif
@@ -342,7 +342,7 @@ void SchedulePostRATDList::Schedule() {
     }
   }
 
-  DEBUG(errs() << "********** List Scheduling **********\n");
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
 
@@ -448,7 +448,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
 /// incorrect by instruction reordering.
 ///
 void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
-  DEBUG(errs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+  DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
 
   std::set<unsigned> killedRegs;
   BitVector ReservedRegs = TRI->getReservedRegs(MF);
@@ -511,7 +511,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       }
       
       if (MO.isKill() != kill) {
-        DEBUG(errs() << "Fixing " << MO << " in ");
+        DEBUG(dbgs() << "Fixing " << MO << " in ");
         // Warning: ToggleKillFlag may invalidate MO.
         ToggleKillFlag(MI, MO);
         DEBUG(MI->dump());
@@ -549,9 +549,9 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
 
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -580,7 +580,7 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
@@ -640,11 +640,11 @@ void SchedulePostRATDList::ListScheduleTopDown() {
         MinDepth = PendingQueue[i]->getDepth();
     }
 
-    DEBUG(errs() << "\n*** Examining Available\n";
+    DEBUG(dbgs() << "\n*** Examining Available\n";
           LatencyPriorityQueue q = AvailableQueue;
           while (!q.empty()) {
             SUnit *su = q.pop();
-            errs() << "Height " << su->getHeight() << ": ";
+            dbgs() << "Height " << su->getHeight() << ": ";
             su->dump(this);
           });
 
@@ -689,19 +689,19 @@ void SchedulePostRATDList::ListScheduleTopDown() {
       }
     } else {
       if (CycleHasInsts) {
-        DEBUG(errs() << "*** Finished cycle " << CurCycle << '\n');
+        DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
         HazardRec->AdvanceCycle();
       } else if (!HasNoopHazards) {
         // Otherwise, we have a pipeline stall, but no other problem,
         // just advance the current cycle and try again.
-        DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n');
+        DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
         HazardRec->AdvanceCycle();
         ++NumStalls;
       } else {
         // Otherwise, we have no instructions to issue and we have instructions
         // that will fault if we don't do this right.  This is the case for
         // processors without pipeline interlocks and other cases.
-        DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+        DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
         HazardRec->EmitNoop();
         Sequence.push_back(0);   // NULL here means noop
         ++NumNoops;
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 1c5222c..8cbc8c2 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -481,32 +481,21 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     
     // Search for the use in this block that precedes the instruction we care 
     // about, going to the fallback case if we don't find it.    
-    if (UseI == MBB->begin())
-      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                            Uses, NewVNs, LiveOut, Phis,
-                                            IsTopLevel, IsIntraBlock);
-    
     MachineBasicBlock::iterator Walker = UseI;
-    --Walker;
     bool found = false;
     while (Walker != MBB->begin()) {
+      --Walker;
       if (BlockUses.count(Walker)) {
         found = true;
         break;
       }
-      --Walker;
-    }
-        
-    // Must check begin() too.
-    if (!found) {
-      if (BlockUses.count(Walker))
-        found = true;
-      else
-        return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                              Uses, NewVNs, LiveOut, Phis,
-                                              IsTopLevel, IsIntraBlock);
     }
 
+    if (!found)
+      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+                                            Uses, NewVNs, LiveOut, Phis,
+                                            IsTopLevel, IsIntraBlock);
+
     SlotIndex UseIndex = LIs->getInstructionIndex(Walker);
     UseIndex = UseIndex.getUseIndex();
     SlotIndex EndIndex;
@@ -533,17 +522,11 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
     // This case is basically a merging of the two preceding case, with the
     // special note that checking for defs must take precedence over checking
     // for uses, because of two-address instructions.
-    
-    if (UseI == MBB->begin())
-      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
-                                            NewVNs, LiveOut, Phis,
-                                            IsTopLevel, IsIntraBlock);
-    
     MachineBasicBlock::iterator Walker = UseI;
-    --Walker;
     bool foundDef = false;
     bool foundUse = false;
     while (Walker != MBB->begin()) {
+      --Walker;
       if (BlockDefs.count(Walker)) {
         foundDef = true;
         break;
@@ -551,21 +534,13 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
         foundUse = true;
         break;
       }
-      --Walker;
-    }
-        
-    // Must check begin() too.
-    if (!foundDef && !foundUse) {
-      if (BlockDefs.count(Walker))
-        foundDef = true;
-      else if (BlockUses.count(Walker))
-        foundUse = true;
-      else
-        return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                              Uses, NewVNs, LiveOut, Phis,
-                                              IsTopLevel, IsIntraBlock);
     }
 
+    if (!foundDef && !foundUse)
+      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+                                            Uses, NewVNs, LiveOut, Phis,
+                                            IsTopLevel, IsIntraBlock);
+
     SlotIndex StartIndex = LIs->getInstructionIndex(Walker);
     StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex();
     SlotIndex EndIndex;
@@ -1022,7 +997,7 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg,
 /// so it would not cross the barrier that's being processed. Shrink wrap
 /// (minimize) the live interval to the last uses.
 bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
-  DEBUG(errs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier
+  DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier
                << "  result: ");
 
   CurrLI = LI;
@@ -1039,7 +1014,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
 
   // If this would create a new join point, do not split.
   if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
-    DEBUG(errs() << "FAILED (would create a new join point).\n");
+    DEBUG(dbgs() << "FAILED (would create a new join point).\n");
     return false;
   }
 
@@ -1056,13 +1031,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   MachineBasicBlock::iterator RestorePt =
     findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB);
   if (RestorePt == BarrierMBB->end()) {
-    DEBUG(errs() << "FAILED (could not find a suitable restore point).\n");
+    DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n");
     return false;
   }
 
   if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
     if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) {
-      DEBUG(errs() << "success (remat).\n");
+      DEBUG(dbgs() << "success (remat).\n");
       return true;
     }
 
@@ -1081,7 +1056,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
       MachineBasicBlock::iterator SpillPt = 
         findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
       if (SpillPt == BarrierMBB->begin()) {
-        DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
+        DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
         return false; // No gap to insert spill.
       }
       // Add spill.
@@ -1096,7 +1071,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
     // If it's already split, just restore the value. There is no need to spill
     // the def again.
     if (!DefMI) {
-      DEBUG(errs() << "FAILED (def is dead).\n");
+      DEBUG(dbgs() << "FAILED (def is dead).\n");
       return false; // Def is dead. Do nothing.
     }
     
@@ -1111,13 +1086,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
         SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
                                  RefsInMBB);
         if (SpillPt == DefMBB->begin()) {
-          DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
+          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
           return false; // No gap to insert spill.
         }
       } else {
         SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
         if (SpillPt == DefMBB->end()) {
-          DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
+          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
           return false; // No gap to insert spill.
         }
       }
@@ -1160,7 +1135,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
   }
   
   ++NumSplits;
-  DEBUG(errs() << "success.\n");
+  DEBUG(dbgs() << "success.\n");
   return true;
 }
 
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index c9a33d8..a00f450 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -61,7 +61,7 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
 /// implicit_def defs and their uses.
 bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
 
-  DEBUG(errs() << "********** PROCESS IMPLICIT DEFS **********\n"
+  DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
                << "********** Function: "
                << ((Value*)fn.getFunction())->getName() << '\n');
 
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 9e97d89..8e44a57 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -277,7 +277,7 @@ namespace {
       bool Error = false;
       for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
         if (regUse_[i] != 0) {
-          errs() << tri_->getName(i) << " is still in use!\n";
+          dbgs() << tri_->getName(i) << " is still in use!\n";
           Error = true;
         }
       }
@@ -344,16 +344,16 @@ namespace {
     void printIntervals(const char* const str, ItTy i, ItTy e) const {
       DEBUG({
           if (str)
-            errs() << str << " intervals:\n";
+            dbgs() << str << " intervals:\n";
 
           for (; i != e; ++i) {
-            errs() << "\t" << *i->first << " -> ";
+            dbgs() << "\t" << *i->first << " -> ";
 
             unsigned reg = i->first->reg;
             if (TargetRegisterInfo::isVirtualRegister(reg))
               reg = vrm_->getPhys(reg);
 
-            errs() << tri_->getName(reg) << '\n';
+            dbgs() << tri_->getName(reg) << '\n';
           }
         });
     }
@@ -455,7 +455,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
     return Reg;
 
   // Try to coalesce.
-  DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
+  DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
         << '\n');
   vrm_->clearVirt(cur.reg);
   vrm_->assignVirt2Phys(cur.reg, CandReg);
@@ -544,7 +544,7 @@ void RALinScan::initIntervalSets()
 void RALinScan::linearScan() {
   // linear scan algorithm
   DEBUG({
-      errs() << "********** LINEAR SCAN **********\n"
+      dbgs() << "********** LINEAR SCAN **********\n"
              << "********** Function: " 
              << mf_->getFunction()->getName() << '\n';
       printIntervals("fixed", fixed_.begin(), fixed_.end());
@@ -555,7 +555,7 @@ void RALinScan::linearScan() {
     LiveInterval* cur = unhandled_.top();
     unhandled_.pop();
     ++NumIters;
-    DEBUG(errs() << "\n*** CURRENT ***: " << *cur << '\n');
+    DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
 
     assert(!cur->empty() && "Empty interval in unhandled set.");
 
@@ -580,7 +580,7 @@ void RALinScan::linearScan() {
   while (!active_.empty()) {
     IntervalPtr &IP = active_.back();
     unsigned reg = IP.first->reg;
-    DEBUG(errs() << "\tinterval " << *IP.first << " expired\n");
+    DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
     assert(TargetRegisterInfo::isVirtualRegister(reg) &&
            "Can only allocate virtual registers!");
     reg = vrm_->getPhys(reg);
@@ -592,7 +592,7 @@ void RALinScan::linearScan() {
   DEBUG({
       for (IntervalPtrs::reverse_iterator
              i = inactive_.rbegin(); i != inactive_.rend(); ++i)
-        errs() << "\tinterval " << *i->first << " expired\n";
+        dbgs() << "\tinterval " << *i->first << " expired\n";
     });
   inactive_.clear();
 
@@ -628,7 +628,7 @@ void RALinScan::linearScan() {
     }
   }
 
-  DEBUG(errs() << *vrm_);
+  DEBUG(dbgs() << *vrm_);
 
   // Look for physical registers that end up not being allocated even though
   // register allocator had to spill other registers in its register class.
@@ -642,7 +642,7 @@ void RALinScan::linearScan() {
 /// to the inactive list.
 void RALinScan::processActiveIntervals(SlotIndex CurPoint)
 {
-  DEBUG(errs() << "\tprocessing active intervals:\n");
+  DEBUG(dbgs() << "\tprocessing active intervals:\n");
 
   for (unsigned i = 0, e = active_.size(); i != e; ++i) {
     LiveInterval *Interval = active_[i].first;
@@ -652,7 +652,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint)
     IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
 
     if (IntervalPos == Interval->end()) {     // Remove expired intervals.
-      DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n");
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -665,7 +665,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint)
 
     } else if (IntervalPos->start > CurPoint) {
       // Move inactive intervals to inactive list.
-      DEBUG(errs() << "\t\tinterval " << *Interval << " inactive\n");
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -688,7 +688,7 @@ void RALinScan::processActiveIntervals(SlotIndex CurPoint)
 /// ones to the active list.
 void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
 {
-  DEBUG(errs() << "\tprocessing inactive intervals:\n");
+  DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
 
   for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
     LiveInterval *Interval = inactive_[i].first;
@@ -698,7 +698,7 @@ void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
     IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
 
     if (IntervalPos == Interval->end()) {       // remove expired intervals.
-      DEBUG(errs() << "\t\tinterval " << *Interval << " expired\n");
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
 
       // Pop off the end of the list.
       inactive_[i] = inactive_.back();
@@ -706,7 +706,7 @@ void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
       --i; --e;
     } else if (IntervalPos->start <= CurPoint) {
       // move re-activated intervals in active list
-      DEBUG(errs() << "\t\tinterval " << *Interval << " active\n");
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
       assert(TargetRegisterInfo::isVirtualRegister(reg) &&
              "Can only allocate virtual registers!");
       reg = vrm_->getPhys(reg);
@@ -834,10 +834,10 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur,
   SmallVector<LiveInterval*, 8> SLIs[3];
 
   DEBUG({
-      errs() << "\tConsidering " << NumCands << " candidates: ";
+      dbgs() << "\tConsidering " << NumCands << " candidates: ";
       for (unsigned i = 0; i != NumCands; ++i)
-        errs() << tri_->getName(Candidates[i].first) << " ";
-      errs() << "\n";
+        dbgs() << tri_->getName(Candidates[i].first) << " ";
+      dbgs() << "\n";
     });
   
   // Calculate the number of conflicts of each candidate.
@@ -950,7 +950,7 @@ namespace {
 /// assignRegOrStackSlotAtInterval - assign a register if one is available, or
 /// spill.
 void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
-  DEBUG(errs() << "\tallocating current interval: ");
+  DEBUG(dbgs() << "\tallocating current interval: ");
 
   // This is an implicitly defined live interval, just assign any register.
   const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
@@ -958,7 +958,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     unsigned physReg = vrm_->getRegAllocPref(cur->reg);
     if (!physReg)
       physReg = *RC->allocation_order_begin(*mf_);
-    DEBUG(errs() <<  tri_->getName(physReg) << '\n');
+    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
     // Note the register is not really in use.
     vrm_->assignVirt2Phys(cur->reg, physReg);
     return;
@@ -1092,7 +1092,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // the free physical register and add this interval to the active
   // list.
   if (physReg) {
-    DEBUG(errs() <<  tri_->getName(physReg) << '\n');
+    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
     vrm_->assignVirt2Phys(cur->reg, physReg);
     addRegUse(physReg);
     active_.push_back(std::make_pair(cur, cur->begin()));
@@ -1108,7 +1108,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     }
     return;
   }
-  DEBUG(errs() << "no free registers\n");
+  DEBUG(dbgs() << "no free registers\n");
 
   // Compile the spill weights into an array that is better for scanning.
   std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
@@ -1126,7 +1126,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
   }
  
-  DEBUG(errs() << "\tassigning stack slot at interval "<< *cur << ":\n");
+  DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
 
   // Find a register to spill.
   float minWeight = HUGE_VALF;
@@ -1196,10 +1196,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   }
 
   DEBUG({
-      errs() << "\t\tregister(s) with min weight(s): ";
+      dbgs() << "\t\tregister(s) with min weight(s): ";
 
       for (unsigned i = 0; i != LastCandidate; ++i)
-        errs() << tri_->getName(RegsWeights[i].first)
+        dbgs() << tri_->getName(RegsWeights[i].first)
                << " (" << RegsWeights[i].second << ")\n";
     });
 
@@ -1207,7 +1207,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   // add any added intervals back to unhandled, and restart
   // linearscan.
   if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
-    DEBUG(errs() << "\t\t\tspilling(c): " << *cur << '\n');
+    DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
     SmallVector<LiveInterval*, 8> spillIs;
     std::vector<LiveInterval*> added;
     
@@ -1285,7 +1285,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   while (!spillIs.empty()) {
     LiveInterval *sli = spillIs.back();
     spillIs.pop_back();
-    DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n');
+    DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
     if (sli->beginIndex() < earliestStart)
       earliestStart = sli->beginIndex();
        
@@ -1296,7 +1296,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     spilled.insert(sli->reg);
   }
 
-  DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n');
+  DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
 
   // Scan handled in reverse order up to the earliest start of a
   // spilled live interval and undo each one, restoring the state of
@@ -1306,7 +1306,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     // If this interval starts before t we are done.
     if (!i->empty() && i->beginIndex() < earliestStart)
       break;
-    DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n');
+    DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
     handled_.pop_back();
 
     // When undoing a live interval allocation we must know if it is active or
@@ -1356,7 +1356,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     LiveInterval *HI = handled_[i];
     if (!HI->expiredAt(earliestStart) &&
         HI->expiredAt(cur->beginIndex())) {
-      DEBUG(errs() << "\t\t\tundo changes for: " << *HI << '\n');
+      DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
       active_.push_back(std::make_pair(HI, HI->begin()));
       assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
       addRegUse(vrm_->getPhys(HI->reg));
@@ -1492,7 +1492,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
   // available first.
   unsigned Preference = vrm_->getRegAllocPref(cur->reg);
   if (Preference) {
-    DEBUG(errs() << "(preferred: " << tri_->getName(Preference) << ") ");
+    DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
     if (isRegAvail(Preference) && 
         RC->contains(Preference))
       return Preference;
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index aea5cff..cbb5826 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -296,11 +296,11 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
   assert(VirtReg && "Spilling a physical register is illegal!"
          " Must not have appropriate kill for the register or use exists beyond"
          " the intended one.");
-  DEBUG(errs() << "  Spilling register " << TRI->getName(PhysReg)
+  DEBUG(dbgs() << "  Spilling register " << TRI->getName(PhysReg)
                << " containing %reg" << VirtReg);
   
   if (!isVirtRegModified(VirtReg)) {
-    DEBUG(errs() << " which has not been modified, so no store necessary!");
+    DEBUG(dbgs() << " which has not been modified, so no store necessary!");
     std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
     if (LastUse.first)
       LastUse.first->getOperand(LastUse.second).setIsKill();
@@ -310,7 +310,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
     // modified.
     const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
     int FrameIndex = getStackSpaceFor(VirtReg, RC);
-    DEBUG(errs() << " to stack slot #" << FrameIndex);
+    DEBUG(dbgs() << " to stack slot #" << FrameIndex);
     // If the instruction reads the register that's spilled, (e.g. this can
     // happen if it is a move to a physical register), then the spill
     // instruction is not a kill.
@@ -321,7 +321,7 @@ void RALocal::spillVirtReg(MachineBasicBlock &MBB,
 
   getVirt2PhysRegMapSlot(VirtReg) = 0;   // VirtReg no longer available
 
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
   removePhysReg(PhysReg);
 }
 
@@ -516,7 +516,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
 
   markVirtRegModified(VirtReg, false);   // Note that this reg was just reloaded
 
-  DEBUG(errs() << "  Reloading %reg" << VirtReg << " into "
+  DEBUG(dbgs() << "  Reloading %reg" << VirtReg << " into "
                << TRI->getName(PhysReg) << "\n");
 
   // Add move instruction(s)
@@ -725,7 +725,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
   DEBUG({
       const BasicBlock *LBB = MBB.getBasicBlock();
       if (LBB)
-        errs() << "\nStarting RegAlloc of BB: " << LBB->getName();
+        dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
     });
 
   // Add live-in registers as active.
@@ -752,13 +752,13 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
     MachineInstr *MI = MII++;
     const TargetInstrDesc &TID = MI->getDesc();
     DEBUG({
-        errs() << "\nStarting RegAlloc of: " << *MI;
-        errs() << "  Regs have values: ";
+        dbgs() << "\nStarting RegAlloc of: " << *MI;
+        dbgs() << "  Regs have values: ";
         for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
           if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
-            errs() << "[" << TRI->getName(i)
+            dbgs() << "[" << TRI->getName(i)
                    << ",%reg" << PhysRegsUsed[i] << "] ";
-        errs() << '\n';
+        dbgs() << '\n';
       });
 
     // Determine whether this is a copy instruction.  The cases where the
@@ -809,7 +809,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
             markVirtRegModified(DestVirtReg);
             getVirtRegLastUse(DestVirtReg) =
                    std::make_pair((MachineInstr*)0, 0);
-            DEBUG(errs() << "  Assigning " << TRI->getName(DestPhysReg)
+            DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
                          << " to %reg" << DestVirtReg << "\n");
             MO.setReg(DestPhysReg);  // Assign the earlyclobber register
           } else {
@@ -876,13 +876,13 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
 
       if (PhysReg) {
-        DEBUG(errs() << "  Last use of " << TRI->getName(PhysReg)
+        DEBUG(dbgs() << "  Last use of " << TRI->getName(PhysReg)
                      << "[%reg" << VirtReg <<"], removing it from live set\n");
         removePhysReg(PhysReg);
         for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
              *SubRegs; ++SubRegs) {
           if (PhysRegsUsed[*SubRegs] != -2) {
-            DEBUG(errs()  << "  Last use of "
+            DEBUG(dbgs()  << "  Last use of "
                           << TRI->getName(*SubRegs) << "[%reg" << VirtReg
                           <<"], removing it from live set\n");
             removePhysReg(*SubRegs);
@@ -978,7 +978,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
         MF->getRegInfo().setPhysRegUsed(DestPhysReg);
         markVirtRegModified(DestVirtReg);
         getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
-        DEBUG(errs() << "  Assigning " << TRI->getName(DestPhysReg)
+        DEBUG(dbgs() << "  Assigning " << TRI->getName(DestPhysReg)
                      << " to %reg" << DestVirtReg << "\n");
         MO.setReg(DestPhysReg);  // Assign the output register
       }
@@ -1001,14 +1001,14 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
       }
 
       if (PhysReg) {
-        DEBUG(errs()  << "  Register " << TRI->getName(PhysReg)
+        DEBUG(dbgs()  << "  Register " << TRI->getName(PhysReg)
                       << " [%reg" << VirtReg
                       << "] is never used, removing it from live set\n");
         removePhysReg(PhysReg);
         for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
              *AliasSet; ++AliasSet) {
           if (PhysRegsUsed[*AliasSet] != -2) {
-            DEBUG(errs()  << "  Register " << TRI->getName(*AliasSet)
+            DEBUG(dbgs()  << "  Register " << TRI->getName(*AliasSet)
                           << " [%reg" << *AliasSet
                           << "] is never used, removing it from live set\n");
             removePhysReg(*AliasSet);
@@ -1058,7 +1058,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
 /// runOnMachineFunction - Register allocate the whole function
 ///
 bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
-  DEBUG(errs() << "Machine Function\n");
+  DEBUG(dbgs() << "Machine Function\n");
   MF = &Fn;
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index c2014a7..fc59653 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -717,7 +717,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
       // Get the physical reg, subtracting 1 to account for the spill option.
       unsigned physReg = allowedSets[node][allocSelection - 1];
 
-      DEBUG(errs() << "VREG " << virtReg << " -> "
+      DEBUG(dbgs() << "VREG " << virtReg << " -> "
                    << tri->getName(physReg) << "\n");
 
       assert(physReg != 0);
@@ -741,7 +741,7 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
       addStackInterval(spillInterval, mri);
 
       (void) oldSpillWeight;
-      DEBUG(errs() << "VREG " << virtReg << " -> SPILLED (Cost: "
+      DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
                    << oldSpillWeight << ", New vregs: ");
 
       // Copy any newly inserted live intervals into the list of regs to
@@ -752,12 +752,12 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
 
         assert(!(*itr)->empty() && "Empty spill range.");
 
-        DEBUG(errs() << (*itr)->reg << " ");
+        DEBUG(dbgs() << (*itr)->reg << " ");
 
         vregIntervalsToAlloc.insert(*itr);
       }
 
-      DEBUG(errs() << ")\n");
+      DEBUG(dbgs() << ")\n");
 
       // We need another round if spill intervals were added.
       anotherRoundNeeded |= !newSpills.empty();
@@ -849,7 +849,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
 
   vrm = &getAnalysis<VirtRegMap>();
 
-  DEBUG(errs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n");
+  DEBUG(dbgs() << "PBQP2 Register Allocating for " << mf->getFunction()->getName() << "\n");
 
   // Allocator main loop:
   //
@@ -874,7 +874,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
     unsigned round = 0;
 
     while (!pbqpAllocComplete) {
-      DEBUG(errs() << "  PBQP Regalloc round " << round << ":\n");
+      DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
 
       PBQP::SimpleGraph problem = constructPBQPProblem();
       PBQP::HeuristicSolver<PBQP::Heuristics::Briggs> solver;
@@ -896,7 +896,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
   node2LI.clear();
   allowedSets.clear();
 
-  DEBUG(errs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
 
   // Run rewriter
   std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 71693d2..1f3e295 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -41,7 +41,7 @@ void ScheduleDAG::dumpSchedule() const {
     if (SUnit *SU = Sequence[i])
       SU->dump(this);
     else
-      errs() << "**** NOOP ****\n";
+      dbgs() << "**** NOOP ****\n";
   }
 }
 
@@ -61,9 +61,9 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
   Schedule();
 
   DEBUG({
-      errs() << "*** Final schedule ***\n";
+      dbgs() << "*** Final schedule ***\n";
       dumpSchedule();
-      errs() << '\n';
+      dbgs() << '\n';
     });
 }
 
@@ -271,58 +271,58 @@ void SUnit::ComputeHeight() {
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
-  errs() << "SU(" << NodeNum << "): ";
+  dbgs() << "SU(" << NodeNum << "): ";
   G->dumpNode(this);
 }
 
 void SUnit::dumpAll(const ScheduleDAG *G) const {
   dump(G);
 
-  errs() << "  # preds left       : " << NumPredsLeft << "\n";
-  errs() << "  # succs left       : " << NumSuccsLeft << "\n";
-  errs() << "  Latency            : " << Latency << "\n";
-  errs() << "  Depth              : " << Depth << "\n";
-  errs() << "  Height             : " << Height << "\n";
+  dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
+  dbgs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  dbgs() << "  Latency            : " << Latency << "\n";
+  dbgs() << "  Depth              : " << Depth << "\n";
+  dbgs() << "  Height             : " << Height << "\n";
 
   if (Preds.size() != 0) {
-    errs() << "  Predecessors:\n";
+    dbgs() << "  Predecessors:\n";
     for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
          I != E; ++I) {
-      errs() << "   ";
+      dbgs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        errs() << "val "; break;
-      case SDep::Anti:        errs() << "anti"; break;
-      case SDep::Output:      errs() << "out "; break;
-      case SDep::Order:       errs() << "ch  "; break;
+      case SDep::Data:        dbgs() << "val "; break;
+      case SDep::Anti:        dbgs() << "anti"; break;
+      case SDep::Output:      dbgs() << "out "; break;
+      case SDep::Order:       dbgs() << "ch  "; break;
       }
-      errs() << "#";
-      errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      dbgs() << "#";
+      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
-        errs() << " *";
-      errs() << ": Latency=" << I->getLatency();
-      errs() << "\n";
+        dbgs() << " *";
+      dbgs() << ": Latency=" << I->getLatency();
+      dbgs() << "\n";
     }
   }
   if (Succs.size() != 0) {
-    errs() << "  Successors:\n";
+    dbgs() << "  Successors:\n";
     for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
          I != E; ++I) {
-      errs() << "   ";
+      dbgs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        errs() << "val "; break;
-      case SDep::Anti:        errs() << "anti"; break;
-      case SDep::Output:      errs() << "out "; break;
-      case SDep::Order:       errs() << "ch  "; break;
+      case SDep::Data:        dbgs() << "val "; break;
+      case SDep::Anti:        dbgs() << "anti"; break;
+      case SDep::Output:      dbgs() << "out "; break;
+      case SDep::Order:       dbgs() << "ch  "; break;
       }
-      errs() << "#";
-      errs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      dbgs() << "#";
+      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
-        errs() << " *";
-      errs() << ": Latency=" << I->getLatency();
-      errs() << "\n";
+        dbgs() << " *";
+      dbgs() << ": Latency=" << I->getLatency();
+      dbgs() << "\n";
     }
   }
-  errs() << "\n";
+  dbgs() << "\n";
 }
 
 #ifndef NDEBUG
@@ -340,35 +340,35 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
         continue;
       }
       if (!AnyNotSched)
-        errs() << "*** Scheduling failed! ***\n";
+        dbgs() << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      errs() << "has not been scheduled!\n";
+      dbgs() << "has not been scheduled!\n";
       AnyNotSched = true;
     }
     if (SUnits[i].isScheduled &&
         (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
           unsigned(INT_MAX)) {
       if (!AnyNotSched)
-        errs() << "*** Scheduling failed! ***\n";
+        dbgs() << "*** Scheduling failed! ***\n";
       SUnits[i].dump(this);
-      errs() << "has an unexpected "
+      dbgs() << "has an unexpected "
            << (isBottomUp ? "Height" : "Depth") << " value!\n";
       AnyNotSched = true;
     }
     if (isBottomUp) {
       if (SUnits[i].NumSuccsLeft != 0) {
         if (!AnyNotSched)
-          errs() << "*** Scheduling failed! ***\n";
+          dbgs() << "*** Scheduling failed! ***\n";
         SUnits[i].dump(this);
-        errs() << "has successors left!\n";
+        dbgs() << "has successors left!\n";
         AnyNotSched = true;
       }
     } else {
       if (SUnits[i].NumPredsLeft != 0) {
         if (!AnyNotSched)
-          errs() << "*** Scheduling failed! ***\n";
+          dbgs() << "*** Scheduling failed! ***\n";
         SUnits[i].dump(this);
-        errs() << "has predecessors left!\n";
+        dbgs() << "has predecessors left!\n";
         AnyNotSched = true;
       }
     }
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
index 38839c4..4e6c1fc 100644
--- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -69,7 +70,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
-      errs() << "Formal argument #" << i << " has unhandled type "
+      dbgs() << "Formal argument #" << i << " has unhandled type "
              << ArgVT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -102,7 +103,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
-      errs() << "Return operand #" << i << " has unhandled type "
+      dbgs() << "Return operand #" << i << " has unhandled type "
              << VT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -121,7 +122,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
-      errs() << "Call operand #" << i << " has unhandled type "
+      dbgs() << "Call operand #" << i << " has unhandled type "
              << ArgVT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -140,7 +141,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
     ISD::ArgFlagsTy ArgFlags = Flags[i];
     if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
 #ifndef NDEBUG
-      errs() << "Call operand #" << i << " has unhandled type "
+      dbgs() << "Call operand #" << i << " has unhandled type "
              << ArgVT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -157,7 +158,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
     ISD::ArgFlagsTy Flags = Ins[i].Flags;
     if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
 #ifndef NDEBUG
-      errs() << "Call result #" << i << " has unhandled type "
+      dbgs() << "Call result #" << i << " has unhandled type "
              << VT.getEVTString();
 #endif
       llvm_unreachable(0);
@@ -170,7 +171,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
 void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
   if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
 #ifndef NDEBUG
-    errs() << "Call result has unhandled type "
+    dbgs() << "Call result has unhandled type "
            << VT.getEVTString();
 #endif
     llvm_unreachable(0);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e6aa14c..549527c 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -541,11 +541,11 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
                                bool AddTo) {
   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
   ++NodesCombined;
-  DEBUG(errs() << "\nReplacing.1 ";
+  DEBUG(dbgs() << "\nReplacing.1 ";
         N->dump(&DAG);
-        errs() << "\nWith: ";
+        dbgs() << "\nWith: ";
         To[0].getNode()->dump(&DAG);
-        errs() << " and " << NumTo-1 << " other values\n";
+        dbgs() << " and " << NumTo-1 << " other values\n";
         for (unsigned i = 0, e = NumTo; i != e; ++i)
           assert((!To[i].getNode() ||
                   N->getValueType(i) == To[i].getValueType()) &&
@@ -619,11 +619,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 
   // Replace the old value with the new one.
   ++NodesCombined;
-  DEBUG(errs() << "\nReplacing.2 "; 
+  DEBUG(dbgs() << "\nReplacing.2 "; 
         TLO.Old.getNode()->dump(&DAG);
-        errs() << "\nWith: ";
+        dbgs() << "\nWith: ";
         TLO.New.getNode()->dump(&DAG);
-        errs() << '\n');
+        dbgs() << '\n');
 
   CommitTargetLoweringOpt(TLO);
   return true;
@@ -689,11 +689,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
            "Node was deleted but visit returned new node!");
 
-    DEBUG(errs() << "\nReplacing.3 "; 
+    DEBUG(dbgs() << "\nReplacing.3 "; 
           N->dump(&DAG);
-          errs() << "\nWith: ";
+          dbgs() << "\nWith: ";
           RV.getNode()->dump(&DAG);
-          errs() << '\n');
+          dbgs() << '\n');
     WorkListRemover DeadNodes(*this);
     if (N->getNumValues() == RV.getNode()->getNumValues())
       DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
@@ -1684,22 +1684,25 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   EVT VT = N0.getValueType();
   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
 
+  // Bail early if none of these transforms apply.
+  if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
   // For each of OP in AND/OR/XOR:
   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
-  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
   //
   // do not sink logical op inside of a vector extend, since it may combine
   // into a vsetcc.
-  if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+  EVT Op0VT = N0.getOperand(0).getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::ANY_EXTEND  ||
        N0.getOpcode() == ISD::SIGN_EXTEND ||
-       (N0.getOpcode() == ISD::TRUNCATE &&
-        !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+       (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
       !VT.isVector() &&
-      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
-      (!LegalOperations ||
-       TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
+      Op0VT == N1.getOperand(0).getValueType() &&
+      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
     SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
                                  N0.getOperand(0).getValueType(),
                                  N0.getOperand(0), N1.getOperand(0));
@@ -1839,6 +1842,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (!VT.isVector() &&
       SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
+
   // fold (zext_inreg (extload x)) -> (zextload x)
   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -1885,48 +1889,69 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
 
   // fold (and (load x), 255) -> (zextload x, i8)
   // fold (and (extload x, i16), 255) -> (zextload x, i8)
-  if (N1C && N0.getOpcode() == ISD::LOAD) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+  if (N1C && (N0.getOpcode() == ISD::LOAD ||
+              (N0.getOpcode() == ISD::ANY_EXTEND &&
+               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+    LoadSDNode *LN0 = HasAnyExt
+      ? cast<LoadSDNode>(N0.getOperand(0))
+      : cast<LoadSDNode>(N0);
     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
-        LN0->isUnindexed() && N0.hasOneUse() &&
-        // Do not change the width of a volatile load.
-        !LN0->isVolatile()) {
-      EVT ExtVT = MVT::Other;
+        LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
       uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
-      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
-        ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-
-      EVT LoadedVT = LN0->getMemoryVT();
-
-      // Do not generate loads of non-round integer types since these can
-      // be expensive (and would be wrong if the type is not byte sized).
-      if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
-          (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
-        EVT PtrType = N0.getOperand(1).getValueType();
-
-        // For big endian targets, we need to add an offset to the pointer to
-        // load the correct bytes.  For little endian systems, we merely need to
-        // read fewer bytes from the same pointer.
-        unsigned LVTStoreBytes = LoadedVT.getStoreSize();
-        unsigned EVTStoreBytes = ExtVT.getStoreSize();
-        unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
-        unsigned Alignment = LN0->getAlignment();
-        SDValue NewPtr = LN0->getBasePtr();
-
-        if (TLI.isBigEndian()) {
-          NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
-                               NewPtr, DAG.getConstant(PtrOff, PtrType));
-          Alignment = MinAlign(Alignment, PtrOff);
+      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+        EVT LoadedVT = LN0->getMemoryVT();
+
+        if (ExtVT == LoadedVT &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          
+          SDValue NewLoad = 
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), LN0->getBasePtr(),
+                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           ExtVT, LN0->isVolatile(), LN0->getAlignment());
+          AddToWorkList(N);
+          CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
         }
+        
+        // Do not change the width of a volatile load.
+        // Do not generate loads of non-round integer types since these can
+        // be expensive (and would be wrong if the type is not byte sized).
+        if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT PtrType = LN0->getOperand(1).getValueType();
+
+          unsigned Alignment = LN0->getAlignment();
+          SDValue NewPtr = LN0->getBasePtr();
+
+          // For big endian targets, we need to add an offset to the pointer
+          // to load the correct bytes.  For little endian systems, we merely
+          // need to read fewer bytes from the same pointer.
+          if (TLI.isBigEndian()) {
+            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+            unsigned EVTStoreBytes = ExtVT.getStoreSize();
+            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+            NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
+            Alignment = MinAlign(Alignment, PtrOff);
+          }
 
-        AddToWorkList(NewPtr.getNode());
-        SDValue Load =
-          DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
-                         NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                         ExtVT, LN0->isVolatile(), Alignment);
-        AddToWorkList(N);
-        CombineTo(N0.getNode(), Load, Load.getValue(1));
-        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+          AddToWorkList(NewPtr.getNode());
+          
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          SDValue Load =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), NewPtr,
+                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           ExtVT, LN0->isVolatile(), Alignment);
+          AddToWorkList(N);
+          CombineTo(LN0, Load, Load.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
       }
     }
   }
@@ -2555,10 +2580,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // sext_inreg.
   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
-    EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
-    if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
+    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+    if (VT.isVector())
+      ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               ExtVT, VT.getVectorNumElements());
+    if ((!LegalOperations ||
+         TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
       return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
-                         N0.getOperand(0), DAG.getValueType(EVT));
+                         N0.getOperand(0), DAG.getValueType(ExtVT));
   }
 
   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
@@ -2778,9 +2807,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // However when after the source operand of SRL is optimized into AND, the SRL
   // itself may not be optimized further. Look for it and add the BRCOND into
   // the worklist.
-  if (N->hasOneUse() &&
-      N->use_begin()->getOpcode() == ISD::BRCOND)
-    AddToWorkList(*N->use_begin());
+  if (N->hasOneUse()) {
+    SDNode *Use = *N->use_begin();
+    if (Use->getOpcode() == ISD::BRCOND)
+      AddToWorkList(Use);
+    else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+      // Also look pass the truncate.
+      Use = *Use->use_begin();
+      if (Use->getOpcode() == ISD::BRCOND)
+        AddToWorkList(Use);
+    }
+  }
 
   return SDValue();
 }
@@ -3034,9 +3071,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // See if the value being truncated is already sign extended.  If so, just
     // eliminate the trunc/sext pair.
     SDValue Op = N0.getOperand(0);
-    unsigned OpBits   = Op.getValueType().getSizeInBits();
-    unsigned MidBits  = N0.getValueType().getSizeInBits();
-    unsigned DestBits = VT.getSizeInBits();
+    unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
+    unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
 
     if (OpBits == DestBits) {
@@ -3059,12 +3096,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // fold (sext (truncate x)) -> (sextinreg x).
     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
                                                  N0.getValueType())) {
-      if (Op.getValueType().bitsLT(VT))
+      if (OpBits < DestBits)
         Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
-      else if (Op.getValueType().bitsGT(VT))
+      else if (OpBits > DestBits)
         Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
       return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
-                         DAG.getValueType(N0.getValueType().getScalarType()));
+                         DAG.getValueType(N0.getValueType()));
     }
   }
 
@@ -3198,7 +3235,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
 
   // fold (zext (truncate x)) -> (and x, mask)
   if (N0.getOpcode() == ISD::TRUNCATE &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
     SDValue Op = N0.getOperand(0);
     if (Op.getValueType().bitsLT(VT)) {
       Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3322,7 +3362,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     DebugLoc dl = N->getDebugLoc();
     return DAG.getNode(N0.getOpcode(), dl, VT,
                        DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
-                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1)));
+                       DAG.getNode(ISD::ZERO_EXTEND, dl,
+                                   N0.getOperand(1).getValueType(),
+                                   N0.getOperand(1)));
   }
 
   return SDValue();
@@ -3512,7 +3554,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   if (VT.isVector())
     return SDValue();
 
-  // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
+  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
   // extended to VT.
   if (Opc == ISD::SIGN_EXTEND_INREG) {
     ExtType = ISD::SEXTLOAD;
@@ -3586,7 +3628,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   EVT VT = N->getValueType(0);
   EVT EVT = cast<VTSDNode>(N1)->getVT();
   unsigned VTBits = VT.getScalarType().getSizeInBits();
-  unsigned EVTBits = EVT.getSizeInBits();
+  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
 
   // fold (sext_in_reg c1) -> c1
   if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
@@ -3702,7 +3744,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
     else
       // if the source and dest are the same type, we can drop both the extend
-      // and the truncate
+      // and the truncate.
       return N0.getOperand(0);
   }
 
@@ -4513,6 +4555,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                        N1.getOperand(0), N1.getOperand(1), N2);
   }
 
+  SDNode *Trunc = 0;
+  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
+    // Look pass truncate.
+    Trunc = N1.getNode();
+    N1 = N1.getOperand(0);
+  }
+
   if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
     // Match this pattern so that we can generate simpler code:
     //
@@ -4524,7 +4573,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     // into
     // 
     //   %a = ...
-    //   %b = and %a, 2
+    //   %b = and i32 %a, 2
     //   %c = setcc eq %b, 0
     //   brcond %c ...
     //
@@ -4535,7 +4584,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     SDValue Op1 = N1.getOperand(1);
 
     if (Op0.getOpcode() == ISD::AND &&
-        Op0.hasOneUse() &&
         Op1.getOpcode() == ISD::Constant) {
       SDValue AndOp1 = Op0.getOperand(1);
 
@@ -4550,12 +4598,21 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                          Op0, DAG.getConstant(0, Op0.getValueType()),
                          ISD::SETNE);
 
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                                          MVT::Other, Chain, SetCC, N2);
+          // Don't add the new BRCond into the worklist or else SimplifySelectCC
+          // will convert it back to (X & C1) >> C2.
+          CombineTo(N, NewBRCond, false);
+          // Truncate is dead.
+          if (Trunc) {
+            removeFromWorkList(Trunc);
+            DAG.DeleteNode(Trunc);
+          }
           // Replace the uses of SRL with SETCC
           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
           removeFromWorkList(N1.getNode());
           DAG.DeleteNode(N1.getNode());
-          return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
-                             MVT::Other, Chain, SetCC, N2);
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
         }
       }
     }
@@ -4692,11 +4749,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
                                  BasePtr, Offset, AM);
   ++PreIndexedNodes;
   ++NodesCombined;
-  DEBUG(errs() << "\nReplacing.4 ";
+  DEBUG(dbgs() << "\nReplacing.4 ";
         N->dump(&DAG);
-        errs() << "\nWith: ";
+        dbgs() << "\nWith: ";
         Result.getNode()->dump(&DAG);
-        errs() << '\n');
+        dbgs() << '\n');
   WorkListRemover DeadNodes(*this);
   if (isLoad) {
     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4826,11 +4883,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
                                 BasePtr, Offset, AM);
         ++PostIndexedNodes;
         ++NodesCombined;
-        DEBUG(errs() << "\nReplacing.5 ";
+        DEBUG(dbgs() << "\nReplacing.5 ";
               N->dump(&DAG);
-              errs() << "\nWith: ";
+              dbgs() << "\nWith: ";
               Result.getNode()->dump(&DAG);
-              errs() << '\n');
+              dbgs() << '\n');
         WorkListRemover DeadNodes(*this);
         if (isLoad) {
           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4889,11 +4946,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
         // v3         = add v2, c
         // Now we replace use of chain2 with chain1.  This makes the second load
         // isomorphic to the one we are deleting, and thus makes this load live.
-        DEBUG(errs() << "\nReplacing.6 ";
+        DEBUG(dbgs() << "\nReplacing.6 ";
               N->dump(&DAG);
-              errs() << "\nWith chain: ";
+              dbgs() << "\nWith chain: ";
               Chain.getNode()->dump(&DAG);
-              errs() << "\n");
+              dbgs() << "\n");
         WorkListRemover DeadNodes(*this);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
 
@@ -4909,11 +4966,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
       if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
-        DEBUG(errs() << "\nReplacing.6 ";
+        DEBUG(dbgs() << "\nReplacing.6 ";
               N->dump(&DAG);
-              errs() << "\nWith: ";
+              dbgs() << "\nWith: ";
               Undef.getNode()->dump(&DAG);
-              errs() << " and 2 other values\n");
+              dbgs() << " and 2 other values\n");
         WorkListRemover DeadNodes(*this);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
@@ -5738,35 +5795,48 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
       if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
         // FIXME: this discards src value information.  This is
         // over-conservative. It would be beneficial to be able to remember
-        // both potential memory locations.
+        // both potential memory locations.  Since we are discarding
+        // src value info, don't do the transformation if the memory
+        // locations are not in the default address space.
+        unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
+        if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
+          if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
+            LLDAddrSpace = PT->getAddressSpace();
+        }
+        if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
+          if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
+            RLDAddrSpace = PT->getAddressSpace();
+        }
         SDValue Addr;
-        if (TheSelect->getOpcode() == ISD::SELECT) {
-          // Check that the condition doesn't reach either load.  If so, folding
-          // this will induce a cycle into the DAG.
-          if ((!LLD->hasAnyUseOfValue(1) ||
-               !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
-              (!RLD->hasAnyUseOfValue(1) ||
-               !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
-            Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
-                               LLD->getBasePtr().getValueType(),
-                               TheSelect->getOperand(0), LLD->getBasePtr(),
-                               RLD->getBasePtr());
-          }
-        } else {
-          // Check that the condition doesn't reach either load.  If so, folding
-          // this will induce a cycle into the DAG.
-          if ((!LLD->hasAnyUseOfValue(1) ||
-               (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-                !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
-              (!RLD->hasAnyUseOfValue(1) ||
-               (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-                !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
-            Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
-                               LLD->getBasePtr().getValueType(),
-                               TheSelect->getOperand(0),
-                               TheSelect->getOperand(1),
-                               LLD->getBasePtr(), RLD->getBasePtr(),
-                               TheSelect->getOperand(4));
+        if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
+          if (TheSelect->getOpcode() == ISD::SELECT) {
+            // Check that the condition doesn't reach either load.  If so, folding
+            // this will induce a cycle into the DAG.
+            if ((!LLD->hasAnyUseOfValue(1) ||
+                 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
+                (!RLD->hasAnyUseOfValue(1) ||
+                 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
+              Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+                                 LLD->getBasePtr().getValueType(),
+                                 TheSelect->getOperand(0), LLD->getBasePtr(),
+                                 RLD->getBasePtr());
+            }
+          } else {
+            // Check that the condition doesn't reach either load.  If so, folding
+            // this will induce a cycle into the DAG.
+            if ((!LLD->hasAnyUseOfValue(1) ||
+                 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                  !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
+                (!RLD->hasAnyUseOfValue(1) ||
+                 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                  !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
+              Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+                                 LLD->getBasePtr().getValueType(),
+                                 TheSelect->getOperand(0),
+                                 TheSelect->getOperand(1),
+                                 LLD->getBasePtr(), RLD->getBasePtr(),
+                                 TheSelect->getOperand(4));
+            }
           }
         }
 
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 33694f2..09fd657 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -78,7 +78,7 @@ unsigned FastISel::getRegForValue(Value *V) {
   // Look up the value to see if we already have a register for it. We
   // cache values defined by Instructions across blocks, and other values
   // only locally. This is because Instructions already have the SSA
-  // def-dominatess-use requirement enforced.
+  // def-dominates-use requirement enforced.
   if (ValueMap.count(V))
     return ValueMap[V];
   unsigned Reg = LocalValueMap[V];
@@ -188,7 +188,7 @@ unsigned FastISel::getRegForGEPIndex(Value *Idx) {
 /// SelectBinaryOp - Select and emit code for a binary operator instruction,
 /// which has an opcode which directly corresponds to the given ISD opcode.
 ///
-bool FastISel::SelectBinaryOp(User *I, ISD::NodeType ISDOpcode) {
+bool FastISel::SelectBinaryOp(User *I, unsigned ISDOpcode) {
   EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
   if (VT == MVT::Other || !VT.isSimple())
     // Unhandled type. Halt "fast" selection and bail.
@@ -325,12 +325,6 @@ bool FastISel::SelectCall(User *I) {
   unsigned IID = F->getIntrinsicID();
   switch (IID) {
   default: break;
-  case Intrinsic::dbg_stoppoint: 
-  case Intrinsic::dbg_region_start: 
-  case Intrinsic::dbg_region_end: 
-  case Intrinsic::dbg_func_start:
-    // FIXME - Remove this instructions once the dust settles.
-    return true;
   case Intrinsic::dbg_declare: {
     DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
     if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None)||!DW
@@ -338,8 +332,6 @@ bool FastISel::SelectCall(User *I) {
       return true;
 
     Value *Address = DI->getAddress();
-    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
-      Address = BCI->getOperand(0);
     AllocaInst *AI = dyn_cast<AllocaInst>(Address);
     // Don't handle byval struct arguments or VLAs, for example.
     if (!AI) break;
@@ -424,7 +416,7 @@ bool FastISel::SelectCall(User *I) {
   return false;
 }
 
-bool FastISel::SelectCast(User *I, ISD::NodeType Opcode) {
+bool FastISel::SelectCast(User *I, unsigned Opcode) {
   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(I->getType());
     
@@ -742,44 +734,44 @@ FastISel::FastISel(MachineFunction &mf,
 FastISel::~FastISel() {}
 
 unsigned FastISel::FastEmit_(MVT, MVT,
-                             ISD::NodeType) {
+                             unsigned) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_r(MVT, MVT,
-                              ISD::NodeType, unsigned /*Op0*/) {
+                              unsigned, unsigned /*Op0*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_rr(MVT, MVT, 
-                               ISD::NodeType, unsigned /*Op0*/,
+                               unsigned, unsigned /*Op0*/,
                                unsigned /*Op0*/) {
   return 0;
 }
 
-unsigned FastISel::FastEmit_i(MVT, MVT, ISD::NodeType, uint64_t /*Imm*/) {
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_f(MVT, MVT,
-                              ISD::NodeType, ConstantFP * /*FPImm*/) {
+                              unsigned, ConstantFP * /*FPImm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_ri(MVT, MVT,
-                               ISD::NodeType, unsigned /*Op0*/,
+                               unsigned, unsigned /*Op0*/,
                                uint64_t /*Imm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_rf(MVT, MVT,
-                               ISD::NodeType, unsigned /*Op0*/,
+                               unsigned, unsigned /*Op0*/,
                                ConstantFP * /*FPImm*/) {
   return 0;
 }
 
 unsigned FastISel::FastEmit_rri(MVT, MVT,
-                                ISD::NodeType,
+                                unsigned,
                                 unsigned /*Op0*/, unsigned /*Op1*/,
                                 uint64_t /*Imm*/) {
   return 0;
@@ -789,7 +781,7 @@ unsigned FastISel::FastEmit_rri(MVT, MVT,
 /// to emit an instruction with an immediate operand using FastEmit_ri.
 /// If that fails, it materializes the immediate into a register and try
 /// FastEmit_rr instead.
-unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode,
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
                                 unsigned Op0, uint64_t Imm,
                                 MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the ri form.
@@ -806,7 +798,7 @@ unsigned FastISel::FastEmit_ri_(MVT VT, ISD::NodeType Opcode,
 /// to emit an instruction with a floating-point immediate operand using
 /// FastEmit_rf. If that fails, it materializes the immediate into a register
 /// and try FastEmit_rr instead.
-unsigned FastISel::FastEmit_rf_(MVT VT, ISD::NodeType Opcode,
+unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
                                 unsigned Op0, ConstantFP *FPImm,
                                 MVT ImmType) {
   // First check if immediate type is legal. If not, we can't use the rf form.
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index e3b25c2..4868c9e 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -113,7 +113,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
     return;
   }
   // Interpret void as zero return values.
-  if (Ty == Type::getVoidTy(Ty->getContext()))
+  if (Ty->isVoidTy())
     return;
   // Base case: we can get an EVT for this LLVM IR type.
   ValueVTs.push_back(TLI.getValueType(Ty));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 474d833..5e3f58a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -32,6 +32,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -950,9 +951,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   switch (Node->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "NODE: ";
-    Node->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "NODE: ";
+    Node->dump( &DAG);
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to legalize this operator!");
 
@@ -2292,12 +2293,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     EVT VT = Node->getValueType(0);
     EVT ShiftAmountTy = TLI.getShiftAmountTy();
-    if (VT.isVector()) {
+    if (VT.isVector())
       ShiftAmountTy = VT;
-      VT = VT.getVectorElementType();
-    }
-    unsigned BitsDiff = VT.getSizeInBits() -
-                        ExtraVT.getSizeInBits();
+    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+                        ExtraVT.getScalarType().getSizeInBits();
     SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
     Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
                        Node->getOperand(0), ShiftCst);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2831617..4f0fce7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -43,15 +43,15 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
-        errs() << "\n");
+  DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
   SDValue R = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "SoftenFloatResult #" << ResNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to soften the result of this operator!");
 
@@ -531,15 +531,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
 //===----------------------------------------------------------------------===//
 
 bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
-        errs() << "\n");
+  DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "SoftenFloatOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to soften this operator's operand!");
 
@@ -768,7 +768,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
 /// have invalid operands or may have other results that need promotion, we just
 /// know that (at least) one result needs expansion.
 void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Expand float result: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Lo, Hi;
   Lo = Hi = SDValue();
 
@@ -779,8 +779,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "ExpandFloatResult #" << ResNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to expand the result of this operator!");
 
@@ -1167,7 +1167,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
 /// types of the node are known to be legal, but other operands of the node may
 /// need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Expand float operand: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
@@ -1178,8 +1178,8 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
     switch (N->getOpcode()) {
     default:
   #ifndef NDEBUG
-      errs() << "ExpandFloatOperand Op #" << OpNo << ": ";
-      N->dump(&DAG); errs() << "\n";
+      dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+      N->dump(&DAG); dbgs() << "\n";
   #endif
       llvm_unreachable("Do not know how to expand this operator's operand!");
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index bd3b97a..9932cf4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
 /// may also have invalid operands or may have other results that need
 /// expansion, we just know that (at least) one result needs promotion.
 void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Promote integer result: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
   // See if the target wants to custom expand this node.
@@ -43,8 +43,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "PromoteIntegerResult #" << ResNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to promote this operator!");
   case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
@@ -599,7 +599,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
 /// result types of the node are known to be legal, but other operands of the
 /// node may need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Promote integer operand: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -608,8 +608,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   switch (N->getOpcode()) {
     default:
   #ifndef NDEBUG
-    errs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
   #endif
     llvm_unreachable("Do not know how to promote this operator's operand!");
 
@@ -910,7 +910,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
 /// have invalid operands or may have other results that need promotion, we just
 /// know that (at least) one result needs expansion.
 void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Expand integer result: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Lo, Hi;
   Lo = Hi = SDValue();
 
@@ -921,8 +921,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "ExpandIntegerResult #" << ResNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to expand the result of this operator!");
 
@@ -1965,7 +1965,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
 /// result types of the node are known to be legal, but other operands of the
 /// node may need promotion or expansion as well as the specified one.
 bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Expand integer operand: "; N->dump(&DAG); errs() << "\n");
+  DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
@@ -1974,8 +1974,8 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   switch (N->getOpcode()) {
   default:
   #ifndef NDEBUG
-    errs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
-    N->dump(&DAG); errs() << "\n";
+    dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
   #endif
     llvm_unreachable("Do not know how to expand this operator's operand!");
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index d9efd4f..37f36a3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -123,42 +123,42 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
         // another node that has not been seen by the LegalizeTypes machinery.
         if ((I->getNodeId() == NewNode && Mapped > 1) ||
             (I->getNodeId() != NewNode && Mapped != 0)) {
-          errs() << "Unprocessed value in a map!";
+          dbgs() << "Unprocessed value in a map!";
           Failed = true;
         }
       } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
         if (Mapped > 1) {
-          errs() << "Value with legal type was transformed!";
+          dbgs() << "Value with legal type was transformed!";
           Failed = true;
         }
       } else {
         if (Mapped == 0) {
-          errs() << "Processed value not in any map!";
+          dbgs() << "Processed value not in any map!";
           Failed = true;
         } else if (Mapped & (Mapped - 1)) {
-          errs() << "Value in multiple maps!";
+          dbgs() << "Value in multiple maps!";
           Failed = true;
         }
       }
 
       if (Failed) {
         if (Mapped & 1)
-          errs() << " ReplacedValues";
+          dbgs() << " ReplacedValues";
         if (Mapped & 2)
-          errs() << " PromotedIntegers";
+          dbgs() << " PromotedIntegers";
         if (Mapped & 4)
-          errs() << " SoftenedFloats";
+          dbgs() << " SoftenedFloats";
         if (Mapped & 8)
-          errs() << " ScalarizedVectors";
+          dbgs() << " ScalarizedVectors";
         if (Mapped & 16)
-          errs() << " ExpandedIntegers";
+          dbgs() << " ExpandedIntegers";
         if (Mapped & 32)
-          errs() << " ExpandedFloats";
+          dbgs() << " ExpandedFloats";
         if (Mapped & 64)
-          errs() << " SplitVectors";
+          dbgs() << " SplitVectors";
         if (Mapped & 128)
-          errs() << " WidenedVectors";
-        errs() << "\n";
+          dbgs() << " WidenedVectors";
+        dbgs() << "\n";
         llvm_unreachable(0);
       }
     }
@@ -342,7 +342,7 @@ ScanOperands:
     }
 
     if (i == NumOperands) {
-      DEBUG(errs() << "Legally typed node: "; N->dump(&DAG); errs() << "\n");
+      DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
     }
     }
 NodeDone:
@@ -411,7 +411,7 @@ NodeDone:
     if (!IgnoreNodeResults(I))
       for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
         if (!isTypeLegal(I->getValueType(i))) {
-          errs() << "Result type " << i << " illegal!\n";
+          dbgs() << "Result type " << i << " illegal!\n";
           Failed = true;
         }
 
@@ -419,24 +419,24 @@ NodeDone:
     for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
       if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
           !isTypeLegal(I->getOperand(i).getValueType())) {
-        errs() << "Operand type " << i << " illegal!\n";
+        dbgs() << "Operand type " << i << " illegal!\n";
         Failed = true;
       }
 
     if (I->getNodeId() != Processed) {
        if (I->getNodeId() == NewNode)
-         errs() << "New node not analyzed?\n";
+         dbgs() << "New node not analyzed?\n";
        else if (I->getNodeId() == Unanalyzed)
-         errs() << "Unanalyzed node not noticed?\n";
+         dbgs() << "Unanalyzed node not noticed?\n";
        else if (I->getNodeId() > 0)
-         errs() << "Operand not processed?\n";
+         dbgs() << "Operand not processed?\n";
        else if (I->getNodeId() == ReadyToProcess)
-         errs() << "Not added to worklist?\n";
+         dbgs() << "Not added to worklist?\n";
        Failed = true;
     }
 
     if (Failed) {
-      I->dump(&DAG); errs() << "\n";
+      I->dump(&DAG); dbgs() << "\n";
       llvm_unreachable(0);
     }
   }
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c35f7ad..b5dbd41 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -509,6 +509,7 @@ private:
   void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
   SDValue ScalarizeVecRes_BinOp(SDNode *N);
   SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+  SDValue ScalarizeVecRes_InregOp(SDNode *N);
 
   SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
   SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
@@ -550,6 +551,7 @@ private:
   void SplitVectorResult(SDNode *N, unsigned OpNo);
   void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -615,6 +617,7 @@ private:
   SDValue WidenVecRes_Convert(SDNode *N);
   SDValue WidenVecRes_Shift(SDNode *N);
   SDValue WidenVecRes_Unary(SDNode *N);
+  SDValue WidenVecRes_InregOp(SDNode *N);
 
   // Widen Vector Operand.
   bool WidenVectorOperand(SDNode *N, unsigned ResNo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2625245..b5f84c0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -179,9 +179,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
-  case ISD::SIGN_EXTEND_INREG:
     QueryType = Node->getValueType(0);
     break;
+  case ISD::SIGN_EXTEND_INREG:
+  case ISD::FP_ROUND_INREG:
+    QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    break;
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
     QueryType = Node->getOperand(0).getValueType();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index cf67ab9..808bac7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -32,17 +32,17 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Scalarize node result " << ResNo << ": ";
+  DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue R = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "ScalarizeVectorResult #" << ResNo << ": ";
+    dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
     N->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to scalarize the result of this operator!");
 
@@ -50,11 +50,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
   case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
   case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND_INREG:    R = ScalarizeVecRes_InregOp(N); break;
   case ISD::FPOWI:             R = ScalarizeVecRes_FPOWI(N); break;
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
   case ISD::LOAD:           R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
-  case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_SIGN_EXTEND_INREG(N); break;
+  case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
   case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
   case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
@@ -186,6 +187,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT,
+                     LHS, DAG.getValueType(ExtVT));
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
   // If the operand is wider than the vector element type then it is implicitly
   // truncated.  Make that explicit here.
@@ -196,13 +205,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
   return InOp;
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N) {
-  EVT EltVT = N->getValueType(0).getVectorElementType();
-  SDValue LHS = GetScalarizedVector(N->getOperand(0));
-  return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), EltVT,
-                     LHS, N->getOperand(1));
-}
-
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
   SDValue LHS = GetScalarizedVector(N->getOperand(1));
   return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
@@ -278,18 +280,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
 //===----------------------------------------------------------------------===//
 
 bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Scalarize node operand " << OpNo << ": ";
+  DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (Res.getNode() == 0) {
     switch (N->getOpcode()) {
     default:
 #ifndef NDEBUG
-      errs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+      dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
       N->dump(&DAG);
-      errs() << "\n";
+      dbgs() << "\n";
 #endif
       llvm_unreachable("Do not know how to scalarize this operator's operand!");
     case ISD::BIT_CONVERT:
@@ -382,17 +384,17 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
 /// legalization, we just know that (at least) one result needs vector
 /// splitting.
 void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Split node result: ";
+  DEBUG(dbgs() << "Split node result: ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue Lo, Hi;
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "SplitVectorResult #" << ResNo << ": ";
+    dbgs() << "SplitVectorResult #" << ResNo << ": ";
     N->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to split the result of this operator!");
 
@@ -406,10 +408,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
   case ISD::CONVERT_RNDSAT:    SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
   case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+  case ISD::FP_ROUND_INREG:    SplitVecRes_InregOp(N, Lo, Hi); break;
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
   case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
   case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
-  case ISD::SIGN_EXTEND_INREG: SplitVecRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
   case ISD::LOAD:
     SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
     break;
@@ -654,6 +657,21 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
   Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
 }
 
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+                   DAG.getValueType(LoVT));
+  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+                   DAG.getValueType(HiVT));
+}
+
 void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
                                                      SDValue &Hi) {
   SDValue Vec = N->getOperand(0);
@@ -709,18 +727,6 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
   Hi = DAG.getUNDEF(HiVT);
 }
 
-void DAGTypeLegalizer::SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo,
-                                                     SDValue &Hi) {
-  SDValue LHSLo, LHSHi;
-  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
-  DebugLoc dl = N->getDebugLoc();
-
-  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
-                   N->getOperand(1));
-  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
-                   N->getOperand(1));
-}
-
 void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
                                         SDValue &Hi) {
   assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
@@ -945,18 +951,18 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
 /// result types of the node are known to be legal, but other operands of the
 /// node may need legalization as well as the specified one.
 bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
-  DEBUG(errs() << "Split node operand: ";
+  DEBUG(dbgs() << "Split node operand: ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue Res = SDValue();
 
   if (Res.getNode() == 0) {
     switch (N->getOpcode()) {
     default:
 #ifndef NDEBUG
-      errs() << "SplitVectorOperand Op #" << OpNo << ": ";
+      dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
       N->dump(&DAG);
-      errs() << "\n";
+      dbgs() << "\n";
 #endif
       llvm_unreachable("Do not know how to split this operator's operand!");
 
@@ -1136,9 +1142,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
 //===----------------------------------------------------------------------===//
 
 void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Widen node result " << ResNo << ": ";
+  DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
 
   // See if the target wants to custom widen this node.
   if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
@@ -1148,9 +1154,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "WidenVectorResult #" << ResNo << ": ";
+    dbgs() << "WidenVectorResult #" << ResNo << ": ";
     N->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to widen the result of this operator!");
 
@@ -1159,10 +1165,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
   case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND_INREG:    Res = WidenVecRes_InregOp(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
   case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
   case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
-  case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_SIGN_EXTEND_INREG(N); break;
+  case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
   case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
   case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
@@ -1331,6 +1338,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
   return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               cast<VTSDNode>(N->getOperand(1))->getVT()
+                                 .getVectorElementType(),
+                               WidenVT.getVectorNumElements());
+  SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
   SDValue InOp = N->getOperand(0);
   EVT InVT = InOp.getValueType();
@@ -1713,13 +1731,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
                      WidenVT, N->getOperand(0));
 }
 
-SDValue DAGTypeLegalizer::WidenVecRes_SIGN_EXTEND_INREG(SDNode *N) {
-  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
-  SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
-  return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
-                     WidenVT, WidenLHS, N->getOperand(1));
-}
-
 SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1806,17 +1817,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
 // Widen Vector Operand
 //===----------------------------------------------------------------------===//
 bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
-  DEBUG(errs() << "Widen node operand " << ResNo << ": ";
+  DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
         N->dump(&DAG);
-        errs() << "\n");
+        dbgs() << "\n");
   SDValue Res = SDValue();
 
   switch (N->getOpcode()) {
   default:
 #ifndef NDEBUG
-    errs() << "WidenVectorOperand op #" << ResNo << ": ";
+    dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
     N->dump(&DAG);
-    errs() << "\n";
+    dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to widen this operator's operand!");
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 4045a34..0c3c974c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -109,7 +109,7 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGFast::Schedule() {
-  DEBUG(errs() << "********** List Scheduling **********\n");
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
   LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
@@ -136,9 +136,9 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
 
 #ifndef NDEBUG
   if (PredSU->NumSuccsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     PredSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -175,7 +175,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
@@ -233,7 +233,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
-    DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n");
+    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
     N = NewNodes[1];
@@ -343,7 +343,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     SU = NewSU;
   }
 
-  DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n");
+  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
   NewSU = Clone(SU);
 
   // New SUnit has the exact same predecessors.
@@ -550,7 +550,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DEBUG(errs() << "Adding an edge from SU # " << TrySU->NodeNum
+          DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
                        << " to SU #" << Copies.front()->NodeNum << "\n");
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
                               /*Reg=*/0, /*isNormalMemory=*/false,
@@ -558,7 +558,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
           NewDef = Copies.back();
         }
 
-        DEBUG(errs() << "Adding an edge from SU # " << NewDef->NodeNum
+        DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
                      << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
         AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index faf21f7..b92a672 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -87,7 +87,7 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGList::Schedule() {
-  DEBUG(errs() << "********** List Scheduling **********\n");
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
   
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
@@ -110,9 +110,9 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
 
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -141,7 +141,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
@@ -233,7 +233,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
     } else if (!HasNoopHazards) {
       // Otherwise, we have a pipeline stall, but no other problem, just advance
       // the current cycle and try again.
-      DEBUG(errs() << "*** Advancing cycle, no work to do\n");
+      DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
       HazardRec->AdvanceCycle();
       ++NumStalls;
       ++CurCycle;
@@ -241,7 +241,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
       // Otherwise, we have no instructions to issue and we have instructions
       // that will fault if we don't do this right.  This is the case for
       // processors without pipeline interlocks and other cases.
-      DEBUG(errs() << "*** Emitting noop\n");
+      DEBUG(dbgs() << "*** Emitting noop\n");
       HazardRec->EmitNoop();
       Sequence.push_back(0);   // NULL here means noop
       ++NumNoops;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 7e1015a..1ad7919 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -164,7 +164,7 @@ private:
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGRRList::Schedule() {
-  DEBUG(errs() << "********** List Scheduling **********\n");
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
   LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
@@ -199,9 +199,9 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
 
 #ifndef NDEBUG
   if (PredSU->NumSuccsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     PredSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -238,7 +238,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
 /// count of its predecessors. If a predecessor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
@@ -284,7 +284,7 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
 /// its predecessor states to reflect the change.
 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
-  DEBUG(errs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
   DEBUG(SU->dump(this));
 
   AvailableQueue->UnscheduledNode(SU);
@@ -371,7 +371,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
-    DEBUG(errs() << "Unfolding SU # " << SU->NodeNum << "\n");
+    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
     N = NewNodes[1];
@@ -490,7 +490,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     SU = NewSU;
   }
 
-  DEBUG(errs() << "Duplicating SU # " << SU->NodeNum << "\n");
+  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
   NewSU = CreateClone(SU);
 
   // New SUnit has the exact same predecessors.
@@ -771,7 +771,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
           InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
-          DEBUG(errs() << "Adding an edge from SU #" << TrySU->NodeNum
+          DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
                        << " to SU #" << Copies.front()->NodeNum << "\n");
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
                               /*Reg=*/0, /*isNormalMemory=*/false,
@@ -780,7 +780,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
           NewDef = Copies.back();
         }
 
-        DEBUG(errs() << "Adding an edge from SU #" << NewDef->NodeNum
+        DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
                      << " to SU #" << TrySU->NodeNum << "\n");
         LiveRegDefs[Reg] = NewDef;
         AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
@@ -827,9 +827,9 @@ void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
 
 #ifndef NDEBUG
   if (SuccSU->NumPredsLeft == 0) {
-    errs() << "*** Scheduling failed! ***\n";
+    dbgs() << "*** Scheduling failed! ***\n";
     SuccSU->dump(this);
-    errs() << " has been released too many times!\n";
+    dbgs() << " has been released too many times!\n";
     llvm_unreachable(0);
   }
 #endif
@@ -858,7 +858,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
 /// count of its successors. If a successor pending count is zero, add it to
 /// the Available queue.
 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
   DEBUG(SU->dump(this));
 
   assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
@@ -1038,6 +1038,10 @@ namespace {
         return 0;
       return SethiUllmanNumbers[SU->NodeNum];
     }
+
+    unsigned getNodeOrdering(const SUnit *SU) const {
+      return scheduleDAG->DAG->GetOrdering(SU->getNode());
+    }
     
     unsigned size() const { return Queue.size(); }
 
@@ -1120,6 +1124,14 @@ static unsigned calcMaxScratches(const SUnit *SU) {
 
 // Bottom up
 bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  unsigned LOrder = SPQ->getNodeOrdering(left);
+  unsigned ROrder = SPQ->getNodeOrdering(right);
+
+  // Prefer an ordering where the lower the non-zero order number, the higher
+  // the preference.
+  if ((LOrder || ROrder) && LOrder != ROrder)
+    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
   unsigned LPriority = SPQ->getNodePriority(left);
   unsigned RPriority = SPQ->getNodePriority(right);
   if (LPriority != RPriority)
@@ -1329,7 +1341,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
 
     // Ok, the transformation is safe and the heuristics suggest it is
     // profitable. Update the graph.
-    DEBUG(errs() << "Prescheduling SU # " << SU->NodeNum
+    DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
                  << " next to PredSU # " << PredSU->NodeNum
                  << " to guide scheduling in the presence of multiple uses\n");
     for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
@@ -1419,7 +1431,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
              (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
              (!SU->isCommutable && SuccSU->isCommutable)) &&
             !scheduleDAG->IsReachable(SuccSU, SU)) {
-          DEBUG(errs() << "Adding a pseudo-two-addr edge from SU # "
+          DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
                        << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
           scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
                                         /*Reg=*/0, /*isNormalMemory=*/false,
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index d53de34..aaaa2b3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -253,19 +253,19 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
 
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
   if (!SU->getNode()) {
-    errs() << "PHYS REG COPY\n";
+    dbgs() << "PHYS REG COPY\n";
     return;
   }
 
   SU->getNode()->dump(DAG);
-  errs() << "\n";
+  dbgs() << "\n";
   SmallVector<SDNode *, 4> FlaggedNodes;
   for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
     FlaggedNodes.push_back(N);
   while (!FlaggedNodes.empty()) {
-    errs() << "    ";
+    dbgs() << "    ";
     FlaggedNodes.back()->dump(DAG);
-    errs() << "\n";
+    dbgs() << "\n";
     FlaggedNodes.pop_back();
   }
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 77301b0..cb1a0d6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
@@ -644,7 +645,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
       !N->isMachineOpcode() && !doNotCSE(N)) {
     N->dump(this);
-    errs() << "\n";
+    dbgs() << "\n";
     llvm_unreachable("Node is not in map!");
   }
 #endif
@@ -1740,7 +1741,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   case ISD::SIGN_EXTEND_INREG: {
     EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
-    unsigned EBits = EVT.getSizeInBits();
+    unsigned EBits = EVT.getScalarType().getSizeInBits();
 
     // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
@@ -1785,7 +1786,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     if (ISD::isZEXTLoad(Op.getNode())) {
       LoadSDNode *LD = cast<LoadSDNode>(Op);
       EVT VT = LD->getMemoryVT();
-      unsigned MemBits = VT.getSizeInBits();
+      unsigned MemBits = VT.getScalarType().getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
     }
     return;
@@ -2024,7 +2025,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
 
   case ISD::SIGN_EXTEND_INREG:
     // Max of the input and what this extends.
-    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+    Tmp =
+      cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
     Tmp = VTBits-Tmp+1;
 
     Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
@@ -2168,10 +2170,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     switch (ExtType) {
     default: break;
     case ISD::SEXTLOAD:    // '17' bits known
-      Tmp = LD->getMemoryVT().getSizeInBits();
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
       return VTBits-Tmp+1;
     case ISD::ZEXTLOAD:    // '16' bits known
-      Tmp = LD->getMemoryVT().getSizeInBits();
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
       return VTBits-Tmp;
     }
   }
@@ -2655,12 +2657,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     // size of the value, the shift/rotate count is guaranteed to be zero.
     if (VT == MVT::i1)
       return N1;
+    if (N2C && N2C->isNullValue())
+      return N1;
     break;
   case ISD::FP_ROUND_INREG: {
     EVT EVT = cast<VTSDNode>(N2)->getVT();
     assert(VT == N1.getValueType() && "Not an inreg round!");
     assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
            "Cannot FP_ROUND_INREG integer types");
+    assert(EVT.isVector() == VT.isVector() &&
+           "FP_ROUND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in FP_ROUND_INREG");
     assert(EVT.bitsLE(VT) && "Not rounding down!");
     if (cast<VTSDNode>(N2)->getVT() == VT) return N1;  // Not actually rounding.
     break;
@@ -2690,15 +2700,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     assert(VT == N1.getValueType() && "Not an inreg extend!");
     assert(VT.isInteger() && EVT.isInteger() &&
            "Cannot *_EXTEND_INREG FP types");
-    assert(!EVT.isVector() &&
-           "SIGN_EXTEND_INREG type should be the vector element type rather "
-           "than the vector type!");
-    assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!");
+    assert(EVT.isVector() == VT.isVector() &&
+           "SIGN_EXTEND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in SIGN_EXTEND_INREG");
+    assert(EVT.bitsLE(VT) && "Not extending!");
     if (EVT == VT) return N1;  // Not actually extending
 
     if (N1C) {
       APInt Val = N1C->getAPIntValue();
-      unsigned FromBits = EVT.getSizeInBits();
+      unsigned FromBits = EVT.getScalarType().getSizeInBits();
       Val <<= Val.getBitWidth()-FromBits;
       Val = Val.ashr(Val.getBitWidth()-FromBits);
       return getConstant(Val, VT);
@@ -4106,7 +4119,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
       if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
         // If the and is only masking out bits that cannot effect the shift,
         // eliminate the and.
-        unsigned NumBits = VT.getSizeInBits()*2;
+        unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
         if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
           return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
       }
@@ -5713,7 +5726,7 @@ std::string ISD::ArgFlagsTy::getArgFlagsString() {
 
 void SDNode::dump() const { dump(0); }
 void SDNode::dump(const SelectionDAG *G) const {
-  print(errs(), G);
+  print(dbgs(), G);
 }
 
 void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
@@ -5885,12 +5898,12 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
     if (N->getOperand(i).getNode()->hasOneUse())
       DumpNodes(N->getOperand(i).getNode(), indent+2, G);
     else
-      errs() << "\n" << std::string(indent+2, ' ')
-             << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+      dbgs() << "\n" << std::string(indent+2, ' ')
+           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
 
 
-  errs() << "\n";
-  errs().indent(indent);
+  dbgs() << "\n";
+  dbgs().indent(indent);
   N->dump(G);
 }
 
@@ -5943,6 +5956,13 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
       Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
                                 getShiftAmountOperand(Operands[1])));
       break;
+    case ISD::SIGN_EXTEND_INREG:
+    case ISD::FP_ROUND_INREG: {
+      EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                Operands[0],
+                                getValueType(ExtVT)));
+    }
     }
   }
 
@@ -6048,7 +6068,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
 }
 
 void SelectionDAG::dump() const {
-  errs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
 
   for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
        I != E; ++I) {
@@ -6059,7 +6079,7 @@ void SelectionDAG::dump() const {
 
   if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
 
-  errs() << "\n\n";
+  dbgs() << "\n\n";
 }
 
 void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
@@ -6106,12 +6126,12 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
 
 void SDNode::dumpr() const {
   VisitedSDNodeSet once;
-  DumpNodesr(errs(), this, 0, 0, once);
+  DumpNodesr(dbgs(), this, 0, 0, once);
 }
 
 void SDNode::dumpr(const SelectionDAG *G) const {
   VisitedSDNodeSet once;
-  DumpNodesr(errs(), this, 0, G, once);
+  DumpNodesr(dbgs(), this, 0, G, once);
 }
 
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 74d624f..5e3a3b5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1195,6 +1195,18 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
     return false;
   }
 
+  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+      Cases[0].CC == Cases[1].CC &&
+      isa<Constant>(Cases[0].CmpRHS) &&
+      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+    if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+      return false;
+    if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+      return false;
+  }
+  
   return true;
 }
 
@@ -1733,7 +1745,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
   if (Density < 0.4)
     return false;
 
-  DEBUG(errs() << "Lowering jump table\n"
+  DEBUG(dbgs() << "Lowering jump table\n"
                << "First entry: " << First << ". Last entry: " << Last << '\n'
                << "Range: " << Range
                << "Size: " << TSize << ". Density: " << Density << "\n\n");
@@ -1837,7 +1849,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
 
   APInt LSize = FrontCase.size();
   APInt RSize = TSize-LSize;
-  DEBUG(errs() << "Selecting best pivot: \n"
+  DEBUG(dbgs() << "Selecting best pivot: \n"
                << "First: " << First << ", Last: " << Last <<'\n'
                << "LSize: " << LSize << ", RSize: " << RSize << '\n');
   for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
@@ -1853,7 +1865,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
                            (Last - RBegin + 1ULL).roundToDouble();
     double Metric = Range.logBase2()*(LDensity+RDensity);
     // Should always split in some non-trivial place
-    DEBUG(errs() <<"=>Step\n"
+    DEBUG(dbgs() <<"=>Step\n"
                  << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
                  << "LDensity: " << LDensity
                  << ", RDensity: " << RDensity << '\n'
@@ -1861,7 +1873,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
     if (FMetric < Metric) {
       Pivot = J;
       FMetric = Metric;
-      DEBUG(errs() << "Current metric set to: " << FMetric << '\n');
+      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
     }
 
     LSize += J->size();
@@ -1965,7 +1977,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
       // Don't bother the code below, if there are too much unique destinations
       return false;
   }
-  DEBUG(errs() << "Total number of unique destinations: "
+  DEBUG(dbgs() << "Total number of unique destinations: "
         << Dests.size() << '\n'
         << "Total number of comparisons: " << numCmps << '\n');
 
@@ -1974,7 +1986,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
   APInt cmpRange = maxValue - minValue;
 
-  DEBUG(errs() << "Compare range: " << cmpRange << '\n'
+  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
                << "Low bound: " << minValue << '\n'
                << "High bound: " << maxValue << '\n');
 
@@ -1984,7 +1996,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
        !(Dests.size() >= 3 && numCmps >= 6)))
     return false;
 
-  DEBUG(errs() << "Emitting bit tests\n");
+  DEBUG(dbgs() << "Emitting bit tests\n");
   APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
 
   // Optimize the case where all the case values fit in a
@@ -2034,9 +2046,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
 
   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
 
-  DEBUG(errs() << "Cases:\n");
+  DEBUG(dbgs() << "Cases:\n");
   for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
-    DEBUG(errs() << "Mask: " << CasesBits[i].Mask
+    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
                  << ", Bits: " << CasesBits[i].Bits
                  << ", BB: " << CasesBits[i].BB << '\n');
 
@@ -2135,7 +2147,7 @@ void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
   // create a binary search tree from them.
   CaseVector Cases;
   size_t numCmps = Clusterify(Cases, SI);
-  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
                << ". Total compares: " << numCmps << '\n');
   numCmps = 0;
 
@@ -3157,7 +3169,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
   } else if (!HasChain) {
     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
                          VTs, &Ops[0], Ops.size());
-  } else if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
+  } else if (!I.getType()->isVoidTy()) {
     Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
                          VTs, &Ops[0], Ops.size());
   } else {
@@ -3176,7 +3188,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
       DAG.setRoot(Chain);
   }
 
-  if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
+  if (!I.getType()->isVoidTy()) {
     if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
       EVT VT = TLI.getValueType(PTy);
       Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
@@ -4406,12 +4418,6 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
       DAG.AssignOrdering(Res.getNode(), SDNodeOrder);
     return 0;
   }
-  case Intrinsic::dbg_stoppoint:
-  case Intrinsic::dbg_region_start:
-  case Intrinsic::dbg_region_end:
-  case Intrinsic::dbg_func_start:
-    // FIXME - Remove this instructions once the dust settles.
-    return 0;
   case Intrinsic::dbg_declare: {
     if (OptLevel != CodeGenOpt::None)
       // FIXME: Variable debug info is not supported here.
@@ -5931,7 +5937,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
 
       // The return value of the call is this value.  As such, there is no
       // corresponding argument.
-      assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
+      assert(!CS.getType()->isVoidTy() &&
              "Bad inline asm!");
       if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
         OpVT = TLI.getValueType(STy->getElementType(ResNo));
@@ -6056,7 +6062,8 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
   std::vector<SDValue> AsmNodeOperands;
   AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
   AsmNodeOperands.push_back(
-          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
+          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+                                      TLI.getPointerTy()));
 
 
   // Loop over all of the inputs, copying the operand values into the
@@ -6100,8 +6107,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
                                                       OpInfo.CallOperandVal));
       } else {
         // This is the result value of the call.
-        assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
-               "Bad inline asm!");
+        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
         // Concatenate this output onto the outputs list.
         RetValRegs.append(OpInfo.AssignedRegs);
       }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 88a2017..db656e3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -163,7 +163,7 @@ private:
   /// The comparison function for sorting the switch case values in the vector.
   /// WARNING: Case ranges should be disjoint!
   struct CaseCmp {
-    bool operator () (const Case& C1, const Case& C2) {
+    bool operator()(const Case &C1, const Case &C2) {
       assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
       const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
       const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
@@ -172,12 +172,12 @@ private:
   };
 
   struct CaseBitsCmp {
-    bool operator () (const CaseBits& C1, const CaseBits& C2) {
+    bool operator()(const CaseBits &C1, const CaseBits &C2) {
       return C1.Bits > C2.Bits;
     }
   };
 
-  size_t Clusterify(CaseVector& Cases, const SwitchInst &SI);
+  size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
 
   /// CaseBlock - This structure is used to communicate between
   /// SelectionDAGBuilder and SDISel for the code generation of additional basic
@@ -215,7 +215,7 @@ private:
     MachineBasicBlock *Default;
   };
   struct JumpTableHeader {
-    JumpTableHeader(APInt F, APInt L, Value* SV, MachineBasicBlock* H,
+    JumpTableHeader(APInt F, APInt L, Value *SV, MachineBasicBlock *H,
                     bool E = false):
       First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
     APInt First;
@@ -230,8 +230,8 @@ private:
     BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
       Mask(M), ThisBB(T), TargetBB(Tr) { }
     uint64_t Mask;
-    MachineBasicBlock* ThisBB;
-    MachineBasicBlock* TargetBB;
+    MachineBasicBlock *ThisBB;
+    MachineBasicBlock *TargetBB;
   };
 
   typedef SmallVector<BitTestCase, 3> BitTestInfo;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 05669c0..9ac8f83 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -162,7 +162,7 @@ MachineBasicBlock *TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                          MachineBasicBlock *MBB,
                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
 #ifndef NDEBUG
-  errs() << "If a target marks an instruction with "
+  dbgs() << "If a target marks an instruction with "
           "'usesCustomInserter', it must implement "
           "TargetLowering::EmitInstrWithCustomInserter!";
 #endif
@@ -325,7 +325,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   else
     GFI = 0;
   RegInfo = &MF->getRegInfo();
-  DEBUG(errs() << "\n\n\n=== " << Fn.getName() << "\n");
+  DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
   MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
@@ -438,6 +438,95 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
   SDB->clear();
 }
 
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener {
+  SmallVector<SDNode*, 128> &Worklist;
+public:
+  SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl) : Worklist(wl) {}
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+                   Worklist.end());
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Ignore updates.
+  }
+};
+}
+
+/// ShrinkDemandedOps - A late transformation pass that shrink expressions
+/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
+/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+void SelectionDAGISel::ShrinkDemandedOps() {
+  SmallVector<SDNode*, 128> Worklist;
+
+  // Add all the dag nodes to the worklist.
+  Worklist.reserve(CurDAG->allnodes_size());
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E; ++I)
+    Worklist.push_back(I);
+
+  APInt Mask;
+  APInt KnownZero;
+  APInt KnownOne;
+
+  TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
+  while (!Worklist.empty()) {
+    SDNode *N = Worklist.pop_back_val();
+
+    if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
+      CurDAG->DeleteNode(N);
+      continue;
+    }
+
+    // Run ShrinkDemandedOp on scalar binary operations.
+    if (N->getNumValues() == 1 &&
+        N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) {
+      unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
+      APInt KnownZero, KnownOne;
+      if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
+                                   KnownZero, KnownOne, TLO)) {
+        // Revisit the node.
+        Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N),
+                       Worklist.end());
+        Worklist.push_back(N);
+
+        // Replace the old value with the new one.
+        DEBUG(errs() << "\nReplacing "; 
+              TLO.Old.getNode()->dump(CurDAG);
+              errs() << "\nWith: ";
+              TLO.New.getNode()->dump(CurDAG);
+              errs() << '\n');
+
+        Worklist.push_back(TLO.New.getNode());
+
+        SDOPsWorkListRemover DeadNodes(Worklist);
+        CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+        if (TLO.Old.getNode()->use_empty()) {
+          for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
+               i != e; ++i) {
+            SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); 
+            if (OpNode->hasOneUse()) {
+              Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                         OpNode), Worklist.end());
+              Worklist.push_back(OpNode);
+            }
+          }
+
+          Worklist.erase(std::remove(Worklist.begin(), Worklist.end(),
+                                     TLO.Old.getNode()), Worklist.end());
+          CurDAG->DeleteNode(TLO.Old.getNode());
+        }
+      }
+    }
+  }
+}
+
 void SelectionDAGISel::ComputeLiveOutVRegInfo() {
   SmallPtrSet<SDNode*, 128> VisitedNodes;
   SmallVector<SDNode*, 128> Worklist;
@@ -448,9 +537,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
   APInt KnownZero;
   APInt KnownOne;
 
-  while (!Worklist.empty()) {
-    SDNode *N = Worklist.back();
-    Worklist.pop_back();
+  do {
+    SDNode *N = Worklist.pop_back_val();
 
     // If we've already seen this node, ignore it.
     if (!VisitedNodes.insert(N))
@@ -490,7 +578,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
       LOI.KnownOne = KnownOne;
       LOI.KnownZero = KnownZero;
     }
-  }
+  } while (!Worklist.empty());
 }
 
 void SelectionDAGISel::CodeGenAndEmitDAG() {
@@ -504,7 +592,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     BlockName = MF->getFunction()->getNameStr() + ":" +
                 BB->getBasicBlock()->getNameStr();
 
-  DEBUG(errs() << "Initial selection DAG:\n");
+  DEBUG(dbgs() << "Initial selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -517,7 +605,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(Unrestricted, *AA, OptLevel);
   }
 
-  DEBUG(errs() << "Optimized lowered selection DAG:\n");
+  DEBUG(dbgs() << "Optimized lowered selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   // Second step, hack on the DAG until it only uses operations and types that
@@ -533,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     Changed = CurDAG->LegalizeTypes();
   }
 
-  DEBUG(errs() << "Type-legalized selection DAG:\n");
+  DEBUG(dbgs() << "Type-legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (Changed) {
@@ -548,7 +636,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
     }
 
-    DEBUG(errs() << "Optimized type-legalized selection DAG:\n");
+    DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n");
     DEBUG(CurDAG->dump());
   }
 
@@ -578,7 +666,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
     }
 
-    DEBUG(errs() << "Optimized vector-legalized selection DAG:\n");
+    DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n");
     DEBUG(CurDAG->dump());
   }
 
@@ -591,7 +679,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Legalize(OptLevel);
   }
 
-  DEBUG(errs() << "Legalized selection DAG:\n");
+  DEBUG(dbgs() << "Legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -604,13 +692,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
   }
 
-  DEBUG(errs() << "Optimized legalized selection DAG:\n");
+  DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
 
-  if (OptLevel != CodeGenOpt::None)
+  if (OptLevel != CodeGenOpt::None) {
+    ShrinkDemandedOps();
     ComputeLiveOutVRegInfo();
+  }
 
   // Third, instruction select all of the operations to machine code, adding the
   // code to the MachineBasicBlock.
@@ -621,7 +711,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     InstructionSelect();
   }
 
-  DEBUG(errs() << "Selected selection DAG:\n");
+  DEBUG(dbgs() << "Selected selection DAG:\n");
   DEBUG(CurDAG->dump());
 
   if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -654,7 +744,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     delete Scheduler;
   }
 
-  DEBUG(errs() << "Selected machine code:\n");
+  DEBUG(dbgs() << "Selected machine code:\n");
   DEBUG(BB->dump());
 }
 
@@ -699,7 +789,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
              I != E; ++I, ++j)
           if (Fn.paramHasAttr(j, Attribute::ByVal)) {
             if (EnableFastISelVerbose || EnableFastISelAbort)
-              errs() << "FastISel skips entry block due to byval argument\n";
+              dbgs() << "FastISel skips entry block due to byval argument\n";
             SuppressFastISel = true;
             break;
           }
@@ -729,10 +819,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       // information is provided by an intrinsic (eh.selector) that can be moved
       // to unexpected places by the optimizers: if the unwind edge is critical,
       // then breaking it can result in the intrinsics being in the successor of
-      // the landing pad, not the landing pad itself.  This results in exceptions
-      // not being caught because no typeids are associated with the invoke.
-      // This may not be the only way things can go wrong, but it is the only way
-      // we try to work around for the moment.
+      // the landing pad, not the landing pad itself.  This results
+      // in exceptions not being caught because no typeids are associated with
+      // the invoke.  This may not be the only way things can go wrong, but it
+      // is the only way we try to work around for the moment.
       BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
 
       if (Br && Br->isUnconditional()) { // Critical edge?
@@ -765,7 +855,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
           if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
             ResetDebugLoc(SDB, FastIS);
             if (EnableFastISelVerbose || EnableFastISelAbort) {
-              errs() << "FastISel miss: ";
+              dbgs() << "FastISel miss: ";
               BI->dump();
             }
             assert(!EnableFastISelAbort &&
@@ -775,7 +865,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 
         SetDebugLoc(MDDbgKind, BI, SDB, FastIS, &MF);
 
-        // First try normal tablegen-generated "fast" selection.
+        // Try to select the instruction with FastISel.
         if (FastIS->SelectInstruction(BI)) {
           ResetDebugLoc(SDB, FastIS);
           continue;
@@ -788,11 +878,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
           if (EnableFastISelVerbose || EnableFastISelAbort) {
-            errs() << "FastISel missed call: ";
+            dbgs() << "FastISel missed call: ";
             BI->dump();
           }
 
-          if (BI->getType() != Type::getVoidTy(*CurDAG->getContext())) {
+          if (!BI->getType()->isVoidTy()) {
             unsigned &R = FuncInfo->ValueMap[BI];
             if (!R)
               R = FuncInfo->CreateRegForValue(BI);
@@ -817,7 +907,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
         // For now, be a little lenient about non-branch terminators.
         if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
           if (EnableFastISelVerbose || EnableFastISelAbort) {
-            errs() << "FastISel miss: ";
+            dbgs() << "FastISel miss: ";
             BI->dump();
           }
           if (EnableFastISelAbort)
@@ -846,13 +936,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
 void
 SelectionDAGISel::FinishBasicBlock() {
 
-  DEBUG(errs() << "Target-post-processed machine code:\n");
+  DEBUG(dbgs() << "Target-post-processed machine code:\n");
   DEBUG(BB->dump());
 
-  DEBUG(errs() << "Total amount of phi nodes to update: "
+  DEBUG(dbgs() << "Total amount of phi nodes to update: "
                << SDB->PHINodesToUpdate.size() << "\n");
   DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i)
-          errs() << "Node " << i << " : ("
+          dbgs() << "Node " << i << " : ("
                  << SDB->PHINodesToUpdate[i].first
                  << ", " << SDB->PHINodesToUpdate[i].second << ")\n");
 
@@ -915,11 +1005,11 @@ SelectionDAGISel::FinishBasicBlock() {
       // This is "default" BB. We have two jumps to it. From "header" BB and
       // from last "case" BB.
       if (PHIBB == SDB->BitTestCases[i].Default) {
-        PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
-                                                  false));
+        PHI->addOperand(MachineOperand::
+                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
-        PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
-                                                  false));
+        PHI->addOperand(MachineOperand::
+                        CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
                                                   back().ThisBB));
       }
@@ -927,10 +1017,9 @@ SelectionDAGISel::FinishBasicBlock() {
       for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
            j != ej; ++j) {
         MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
-        if (cBB->succ_end() !=
-            std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) {
-          PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second,
-                                                    false));
+        if (cBB->isSuccessor(PHIBB)) {
+          PHI->addOperand(MachineOperand::
+                          CreateReg(SDB->PHINodesToUpdate[pi].second, false));
           PHI->addOperand(MachineOperand::CreateMBB(cBB));
         }
       }
@@ -977,7 +1066,7 @@ SelectionDAGISel::FinishBasicBlock() {
           (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
       }
       // JT BB. Just iterate over successors here
-      if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) {
+      if (BB->isSuccessor(PHIBB)) {
         PHI->addOperand
           (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false));
         PHI->addOperand(MachineOperand::CreateMBB(BB));
@@ -1023,17 +1112,23 @@ SelectionDAGISel::FinishBasicBlock() {
         SDB->EdgeMapping.find(BB);
       if (EI != SDB->EdgeMapping.end())
         ThisBB = EI->second;
-      for (MachineBasicBlock::iterator Phi = BB->begin();
-           Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){
-        // This value for this PHI node is recorded in PHINodesToUpdate, get it.
-        for (unsigned pn = 0; ; ++pn) {
-          assert(pn != SDB->PHINodesToUpdate.size() &&
-                 "Didn't find PHI entry!");
-          if (SDB->PHINodesToUpdate[pn].first == Phi) {
-            Phi->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pn].
-                                                      second, false));
-            Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
-            break;
+
+      // BB may have been removed from the CFG if a branch was constant folded.
+      if (ThisBB->isSuccessor(BB)) {
+        for (MachineBasicBlock::iterator Phi = BB->begin();
+             Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI;
+             ++Phi) {
+          // This value for this PHI node is recorded in PHINodesToUpdate.
+          for (unsigned pn = 0; ; ++pn) {
+            assert(pn != SDB->PHINodesToUpdate.size() &&
+                   "Didn't find PHI entry!");
+            if (SDB->PHINodesToUpdate[pn].first == Phi) {
+              Phi->addOperand(MachineOperand::
+                              CreateReg(SDB->PHINodesToUpdate[pn].second,
+                                        false));
+              Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
+              break;
+            }
           }
         }
       }
@@ -1302,45 +1397,47 @@ bool SelectionDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
   return !isNonImmUse(Root, N, U);
 }
 
-SDNode *SelectionDAGISel::Select_INLINEASM(SDValue N) {
-  std::vector<SDValue> Ops(N.getNode()->op_begin(), N.getNode()->op_end());
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+  std::vector<SDValue> Ops(N->op_begin(), N->op_end());
   SelectInlineAsmMemoryOperands(Ops);
     
   std::vector<EVT> VTs;
   VTs.push_back(MVT::Other);
   VTs.push_back(MVT::Flag);
-  SDValue New = CurDAG->getNode(ISD::INLINEASM, N.getDebugLoc(),
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
                                 VTs, &Ops[0], Ops.size());
   return New.getNode();
 }
 
-SDNode *SelectionDAGISel::Select_UNDEF(const SDValue &N) {
-  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::IMPLICIT_DEF,
-                              N.getValueType());
+SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
+  return CurDAG->SelectNodeTo(N, TargetInstrInfo::IMPLICIT_DEF,
+                              N->getValueType(0));
 }
 
-SDNode *SelectionDAGISel::Select_EH_LABEL(const SDValue &N) {
-  SDValue Chain = N.getOperand(0);
+SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
   unsigned C = cast<LabelSDNode>(N)->getLabelID();
   SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
-  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::EH_LABEL,
+  return CurDAG->SelectNodeTo(N, TargetInstrInfo::EH_LABEL,
                               MVT::Other, Tmp, Chain);
 }
 
-void SelectionDAGISel::CannotYetSelect(SDValue N) {
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
   std::string msg;
   raw_string_ostream Msg(msg);
   Msg << "Cannot yet select: ";
-  N.getNode()->print(Msg, CurDAG);
+  N->print(Msg, CurDAG);
   llvm_report_error(Msg.str());
 }
 
-void SelectionDAGISel::CannotYetSelectIntrinsic(SDValue N) {
-  errs() << "Cannot yet select: ";
+void SelectionDAGISel::CannotYetSelectIntrinsic(SDNode *N) {
+  dbgs() << "Cannot yet select: ";
   unsigned iid =
-    cast<ConstantSDNode>(N.getOperand(N.getOperand(0).getValueType() == MVT::Other))->getZExtValue();
+    cast<ConstantSDNode>(N->getOperand(N->getOperand(0).getValueType() ==
+                                       MVT::Other))->getZExtValue();
   if (iid < Intrinsic::num_intrinsics)
-    llvm_report_error("Cannot yet select: intrinsic %" + Intrinsic::getName((Intrinsic::ID)iid));
+    llvm_report_error("Cannot yet select: intrinsic %" +
+                      Intrinsic::getName((Intrinsic::ID)iid));
   else if (const TargetIntrinsicInfo *tii = TM.getIntrinsicInfo())
     llvm_report_error(Twine("Cannot yet select: target intrinsic %") +
                       tii->getName(iid));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 83fa5a8..3786bd1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -225,7 +225,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet
   if (level >= 20) {
     if (!printed) {
       printed = true;
-      DEBUG(errs() << "setSubgraphColor hit max level\n");
+      DEBUG(dbgs() << "setSubgraphColor hit max level\n");
     }
     return true;
   }
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d9a5a13..81c51c4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -990,7 +990,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
@@ -1024,7 +1024,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (TLO.ShrinkDemandedConstant(Op, NewMask))
       return true;
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1049,7 +1049,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if ((KnownZero2 & NewMask) == NewMask)
       return TLO.CombineTo(Op, Op.getOperand(1));
     // If the operation can be done in a smaller type, do so.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
 
     // If all of the unknown bits are known to be zero on one side or the other
@@ -1272,19 +1272,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // Sign extension.  Compute the demanded bits in the result that are not 
     // present in the input.
-    APInt NewBits = APInt::getHighBitsSet(BitWidth,
-                                          BitWidth - EVT.getSizeInBits()) &
-                    NewMask;
+    APInt NewBits =
+      APInt::getHighBitsSet(BitWidth,
+                            BitWidth - EVT.getScalarType().getSizeInBits()) &
+      NewMask;
     
     // If none of the extended bits are demanded, eliminate the sextinreg.
     if (NewBits == 0)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
-    APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits());
+    APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
     InSignBit.zext(BitWidth);
-    APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth,
-                                                   EVT.getSizeInBits()) &
-                              NewMask;
+    APInt InputDemandedBits =
+      APInt::getLowBitsSet(BitWidth,
+                           EVT.getScalarType().getSizeInBits()) &
+      NewMask;
     
     // Since the sign extended bits are demanded, we know that the sign
     // bit is demanded.
@@ -1313,7 +1315,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::ZERO_EXTEND: {
-    unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
     APInt InMask = NewMask;
     InMask.trunc(OperandBitWidth);
     
@@ -1336,7 +1339,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   }
   case ISD::SIGN_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getSizeInBits();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
     APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
     APInt NewBits   = ~InMask & NewMask;
@@ -1376,7 +1379,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::ANY_EXTEND: {
-    unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits();
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
     APInt InMask = NewMask;
     InMask.trunc(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), InMask,
@@ -1480,7 +1484,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                              KnownOne2, TLO, Depth+1))
       return true;
     // See if the operation should be performed at a smaller bit width.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+    if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
       return true;
   }
   // FALL THROUGH
@@ -1597,7 +1601,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
         N0.getOperand(0).getOpcode() == ISD::CTLZ &&
         N0.getOperand(1).getOpcode() == ISD::Constant) {
-      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+      const APInt &ShAmt
+        = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
           ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
@@ -1625,27 +1630,26 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         N0.getOperand(0).getNode()->hasOneUse() &&
         isa<ConstantSDNode>(N0.getOperand(1))) {
       LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
-      uint64_t bestMask = 0;
+      APInt bestMask;
       unsigned bestWidth = 0, bestOffset = 0;
-      if (!Lod->isVolatile() && Lod->isUnindexed() &&
-          // FIXME: This uses getZExtValue() below so it only works on i64 and
-          // below.
-          N0.getValueType().getSizeInBits() <= 64) {
+      if (!Lod->isVolatile() && Lod->isUnindexed()) {
         unsigned origWidth = N0.getValueType().getSizeInBits();
+        unsigned maskWidth = origWidth;
         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to 
         // 8 bits, but have to be careful...
         if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
           origWidth = Lod->getMemoryVT().getSizeInBits();
-        uint64_t Mask =cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+        const APInt &Mask =
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
         for (unsigned width = origWidth / 2; width>=8; width /= 2) {
-          uint64_t newMask = (1ULL << width) - 1;
+          APInt newMask = APInt::getLowBitsSet(maskWidth, width);
           for (unsigned offset=0; offset<origWidth/width; offset++) {
             if ((newMask & Mask) == Mask) {
               if (!TD->isLittleEndian())
                 bestOffset = (origWidth/width - offset - 1) * (width/8);
               else
                 bestOffset = (uint64_t)offset * (width/8);
-              bestMask = Mask >> (offset * (width/8) * 8);
+              bestMask = Mask.lshr(offset * (width/8) * 8);
               bestWidth = width;
               break;
             }
@@ -1668,7 +1672,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                                         false, NewAlign);
           return DAG.getSetCC(dl, VT, 
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
-                                          DAG.getConstant(bestMask, newVT)),
+                                      DAG.getConstant(bestMask.trunc(bestWidth),
+                                                      newVT)),
                               DAG.getConstant(0LL, newVT), Cond);
         }
       }
@@ -1760,7 +1765,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       
       // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
       if (N0.getOpcode() == ISD::SETCC) {
-        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getZExtValue() != 1);
+        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
         if (TrueWhenTrue)
           return N0;
         
@@ -1876,24 +1881,27 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // Fold bit comparisons when we can.
     if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-        VT == N0.getValueType() && N0.getOpcode() == ISD::AND)
+        (VT == N0.getValueType() ||
+         (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+        N0.getOpcode() == ISD::AND)
       if (ConstantSDNode *AndRHS =
                   dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
         EVT ShiftTy = DCI.isBeforeLegalize() ?
           getPointerTy() : getShiftAmountTy();
         if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
           // Perform the xform if the AND RHS is a single bit.
-          if (isPowerOf2_64(AndRHS->getZExtValue())) {
-            return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                DAG.getConstant(Log2_64(AndRHS->getZExtValue()),
-                                                ShiftTy));
+          if (AndRHS->getAPIntValue().isPowerOf2()) {
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                              DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
           }
-        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) {
+        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
           // (X & 8) == 8  -->  (X & 8) >> 3
           // Perform the xform if C1 is a single bit.
           if (C1.isPowerOf2()) {
-            return DAG.getNode(ISD::SRL, dl, VT, N0,
-                                DAG.getConstant(C1.logBase2(), ShiftTy));
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                                      DAG.getConstant(C1.logBase2(), ShiftTy)));
           }
         }
       }
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 8070570..aeaa38b 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -185,7 +185,7 @@ void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
   initShrinkWrappingInfo();
 
   DEBUG(if (ShrinkWrapThisFunction) {
-      errs() << "Place CSR spills/restores for "
+      dbgs() << "Place CSR spills/restores for "
              << MF->getFunction()->getName() << "\n";
     });
 
@@ -299,7 +299,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) {
 
   DEBUG({
       if (ShrinkWrapDebugging >= Details) {
-        errs()
+        dbgs()
           << "-----------------------------------------------------------\n"
           << " Antic/Avail Sets:\n"
           << "-----------------------------------------------------------\n"
@@ -314,7 +314,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) {
           dumpSets(MBB);
         }
 
-        errs()
+        dbgs()
           << "-----------------------------------------------------------\n";
       }
     });
@@ -363,7 +363,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // If no CSRs used, we are done.
   if (CSI.empty()) {
     DEBUG(if (ShrinkWrapThisFunction)
-            errs() << "DISABLED: " << Fn.getFunction()->getName()
+            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                    << ": uses no callee-saved registers\n");
     return false;
   }
@@ -383,7 +383,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   // implementation to functions with <= 500 MBBs.
   if (Fn.size() > 500) {
     DEBUG(if (ShrinkWrapThisFunction)
-            errs() << "DISABLED: " << Fn.getFunction()->getName()
+            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                    << ": too large (" << Fn.size() << " MBBs)\n");
     ShrinkWrapThisFunction = false;
   }
@@ -465,7 +465,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
   }
 
   if (allCSRUsesInEntryBlock) {
-    DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+    DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                  << ": all CSRs used in EntryBlock\n");
     ShrinkWrapThisFunction = false;
   } else {
@@ -477,7 +477,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
         allCSRsUsedInEntryFanout = false;
     }
     if (allCSRsUsedInEntryFanout) {
-      DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+      DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                    << ": all CSRs used in imm successors of EntryBlock\n");
       ShrinkWrapThisFunction = false;
     }
@@ -504,7 +504,7 @@ bool PEI::calculateSets(MachineFunction &Fn) {
       if (dominatesExitNodes) {
         CSRUsedInChokePoints |= CSRUsed[MBB];
         if (CSRUsedInChokePoints == UsedCSRegs) {
-          DEBUG(errs() << "DISABLED: " << Fn.getFunction()->getName()
+          DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
                        << ": all CSRs used in choke point(s) at "
                        << getBasicBlockName(MBB) << "\n");
           ShrinkWrapThisFunction = false;
@@ -520,16 +520,16 @@ bool PEI::calculateSets(MachineFunction &Fn) {
     return false;
 
   DEBUG({
-      errs() << "ENABLED: " << Fn.getFunction()->getName();
+      dbgs() << "ENABLED: " << Fn.getFunction()->getName();
       if (HasFastExitPath)
-        errs() << " (fast exit path)";
-      errs() << "\n";
+        dbgs() << " (fast exit path)";
+      dbgs() << "\n";
       if (ShrinkWrapDebugging >= BasicInfo) {
-        errs() << "------------------------------"
+        dbgs() << "------------------------------"
              << "-----------------------------\n";
-        errs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+        dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
         if (ShrinkWrapDebugging >= Details) {
-          errs() << "------------------------------"
+          dbgs() << "------------------------------"
                << "-----------------------------\n";
           dumpAllUsed();
         }
@@ -602,7 +602,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
       addedUses = true;
       blks.push_back(SUCC);
       DEBUG(if (ShrinkWrapDebugging >= Iterations)
-              errs() << getBasicBlockName(MBB)
+              dbgs() << getBasicBlockName(MBB)
                    << "(" << stringifyCSRegSet(prop) << ")->"
                    << "successor " << getBasicBlockName(SUCC) << "\n");
     }
@@ -618,7 +618,7 @@ bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
       addedUses = true;
       blks.push_back(PRED);
       DEBUG(if (ShrinkWrapDebugging >= Iterations)
-              errs() << getBasicBlockName(MBB)
+              dbgs() << getBasicBlockName(MBB)
                    << "(" << stringifyCSRegSet(prop) << ")->"
                    << "predecessor " << getBasicBlockName(PRED) << "\n");
     }
@@ -656,7 +656,7 @@ bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
         CSRUsed[EXB] |= loopSpills;
         addedUses = true;
         DEBUG(if (ShrinkWrapDebugging >= Iterations)
-                errs() << "LOOP " << getBasicBlockName(MBB)
+                dbgs() << "LOOP " << getBasicBlockName(MBB)
                      << "(" << stringifyCSRegSet(loopSpills) << ")->"
                      << getBasicBlockName(EXB) << "\n");
         if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
@@ -723,7 +723,7 @@ bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
     blks.push_back(MBB);
 
   DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
-          errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRSave[MBB]) << "\n");
 
   return placedSpills;
@@ -784,7 +784,7 @@ bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
     blks.push_back(MBB);
 
   DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
-          errs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+          dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
 
   return placedRestores;
@@ -808,7 +808,7 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
     ++iterations;
 
     DEBUG(if (ShrinkWrapDebugging >= Iterations)
-            errs() << "iter " << iterations
+            dbgs() << "iter " << iterations
                  << " --------------------------------------------------\n");
 
     // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
@@ -858,15 +858,15 @@ void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
   unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
   numSRReduced += numSRReducedThisFunc;
   DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
-      errs() << "-----------------------------------------------------------\n";
-      errs() << "total iterations = " << iterations << " ( "
+      dbgs() << "-----------------------------------------------------------\n";
+      dbgs() << "total iterations = " << iterations << " ( "
            << Fn.getFunction()->getName()
            << " " << numSRReducedThisFunc
            << " " << Fn.size()
            << " )\n";
-      errs() << "-----------------------------------------------------------\n";
+      dbgs() << "-----------------------------------------------------------\n";
       dumpSRSets();
-      errs() << "-----------------------------------------------------------\n";
+      dbgs() << "-----------------------------------------------------------\n";
       if (numSRReducedThisFunc)
         verifySpillRestorePlacement();
     });
@@ -899,7 +899,7 @@ void PEI::findFastExitPath() {
     // Check the immediate successors.
     if (isReturnBlock(SUCC)) {
       if (ShrinkWrapDebugging >= BasicInfo)
-        errs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
              << "->" << getBasicBlockName(SUCC) << "\n";
       break;
     }
@@ -917,7 +917,7 @@ void PEI::findFastExitPath() {
     }
     if (HasFastExitPath) {
       if (ShrinkWrapDebugging >= BasicInfo)
-        errs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
              << "->" << exitPath << "\n";
       break;
     }
@@ -951,7 +951,7 @@ void PEI::verifySpillRestorePlacement() {
     if (spilled.empty())
       continue;
 
-    DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(spilled)
                  << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
@@ -983,7 +983,7 @@ void PEI::verifySpillRestorePlacement() {
       if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
         if (restored != spilled) {
           CSRegSet notRestored = (spilled - restored);
-          DEBUG(errs() << MF->getFunction()->getName() << ": "
+          DEBUG(dbgs() << MF->getFunction()->getName() << ": "
                        << stringifyCSRegSet(notRestored)
                        << " spilled at " << getBasicBlockName(MBB)
                        << " are never restored on path to return "
@@ -1004,7 +1004,7 @@ void PEI::verifySpillRestorePlacement() {
     if (restored.empty())
       continue;
 
-    DEBUG(errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(CSRSave[MBB])
                  << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(restored) << "\n");
@@ -1031,7 +1031,7 @@ void PEI::verifySpillRestorePlacement() {
     }
     if (spilled != restored) {
       CSRegSet notSpilled = (restored - spilled);
-      DEBUG(errs() << MF->getFunction()->getName() << ": "
+      DEBUG(dbgs() << MF->getFunction()->getName() << ": "
                    << stringifyCSRegSet(notSpilled)
                    << " restored at " << getBasicBlockName(MBB)
                    << " are never spilled\n");
@@ -1078,13 +1078,13 @@ std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
 }
 
 void PEI::dumpSet(const CSRegSet& s) {
-  DEBUG(errs() << stringifyCSRegSet(s) << "\n");
+  DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
 }
 
 void PEI::dumpUsed(MachineBasicBlock* MBB) {
   DEBUG({
       if (MBB)
-        errs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+        dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
                << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
     });
 }
@@ -1100,7 +1100,7 @@ void PEI::dumpAllUsed() {
 void PEI::dumpSets(MachineBasicBlock* MBB) {
   DEBUG({
       if (MBB)
-        errs() << getBasicBlockName(MBB)           << " | "
+        dbgs() << getBasicBlockName(MBB)           << " | "
                << stringifyCSRegSet(CSRUsed[MBB])  << " | "
                << stringifyCSRegSet(AnticIn[MBB])  << " | "
                << stringifyCSRegSet(AnticOut[MBB]) << " | "
@@ -1112,7 +1112,7 @@ void PEI::dumpSets(MachineBasicBlock* MBB) {
 void PEI::dumpSets1(MachineBasicBlock* MBB) {
   DEBUG({
       if (MBB)
-        errs() << getBasicBlockName(MBB)             << " | "
+        dbgs() << getBasicBlockName(MBB)             << " | "
                << stringifyCSRegSet(CSRUsed[MBB])    << " | "
                << stringifyCSRegSet(AnticIn[MBB])    << " | "
                << stringifyCSRegSet(AnticOut[MBB])   << " | "
@@ -1136,14 +1136,14 @@ void PEI::dumpSRSets() {
       for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
            MBB != E; ++MBB) {
         if (!CSRSave[MBB].empty()) {
-          errs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(CSRSave[MBB]);
           if (CSRRestore[MBB].empty())
-            errs() << '\n';
+            dbgs() << '\n';
         }
 
         if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
-          errs() << "    "
+          dbgs() << "    "
                  << "RESTORE[" << getBasicBlockName(MBB) << "] = "
                  << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
       }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 6314331..27d429b 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -183,16 +183,16 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
       if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
         DEBUG({
-            errs() << "Interfere with sub-register ";
-            li_->getInterval(*SR).print(errs(), tri_);
+            dbgs() << "Interfere with sub-register ";
+            li_->getInterval(*SR).print(dbgs(), tri_);
           });
         return false;
       }
   }
 
   DEBUG({
-      errs() << "\nExtending: ";
-      IntB.print(errs(), tri_);
+      dbgs() << "\nExtending: ";
+      IntB.print(dbgs(), tri_);
     });
 
   SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
@@ -224,9 +224,9 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
     IntB.MergeValueNumberInto(BValNo, ValLR->valno);
   }
   DEBUG({
-      errs() << "   result = ";
-      IntB.print(errs(), tri_);
-      errs() << "\n";
+      dbgs() << "   result = ";
+      IntB.print(dbgs(), tri_);
+      dbgs() << "\n";
     });
 
   // If the source instruction was killing the source register before the
@@ -467,8 +467,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
   // simply extend BLR if CopyMI doesn't end the range.
   DEBUG({
-      errs() << "\nExtending: ";
-      IntB.print(errs(), tri_);
+      dbgs() << "\nExtending: ";
+      IntB.print(dbgs(), tri_);
     });
 
   // Remove val#'s defined by copies that will be coalesced away.
@@ -518,19 +518,19 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
   ValNo->setHasPHIKill(BHasPHIKill);
 
   DEBUG({
-      errs() << "   result = ";
-      IntB.print(errs(), tri_);
-      errs() << '\n';
-      errs() << "\nShortening: ";
-      IntA.print(errs(), tri_);
+      dbgs() << "   result = ";
+      IntB.print(dbgs(), tri_);
+      dbgs() << '\n';
+      dbgs() << "\nShortening: ";
+      IntA.print(dbgs(), tri_);
     });
 
   IntA.removeValNo(AValNo);
 
   DEBUG({
-      errs() << "   result = ";
-      IntA.print(errs(), tri_);
-      errs() << '\n';
+      dbgs() << "   result = ";
+      IntA.print(dbgs(), tri_);
+      dbgs() << '\n';
     });
 
   ++numCommutes;
@@ -1223,16 +1223,16 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
   if (li_->hasInterval(RealDstReg) &&
       RHS.overlaps(li_->getInterval(RealDstReg))) {
     DEBUG({
-        errs() << "Interfere with register ";
-        li_->getInterval(RealDstReg).print(errs(), tri_);
+        dbgs() << "Interfere with register ";
+        li_->getInterval(RealDstReg).print(dbgs(), tri_);
       });
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
     if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
-          errs() << "Interfere with sub-register ";
-          li_->getInterval(*SR).print(errs(), tri_);
+          dbgs() << "Interfere with sub-register ";
+          li_->getInterval(*SR).print(dbgs(), tri_);
         });
       return false; // Not coalescable
     }
@@ -1254,16 +1254,16 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
   if (li_->hasInterval(RealSrcReg) &&
       RHS.overlaps(li_->getInterval(RealSrcReg))) {
     DEBUG({
-        errs() << "Interfere with register ";
-        li_->getInterval(RealSrcReg).print(errs(), tri_);
+        dbgs() << "Interfere with register ";
+        li_->getInterval(RealSrcReg).print(dbgs(), tri_);
       });
     return false; // Not coalescable
   }
   for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
     if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
       DEBUG({
-          errs() << "Interfere with sub-register ";
-          li_->getInterval(*SR).print(errs(), tri_);
+          dbgs() << "Interfere with sub-register ";
+          li_->getInterval(*SR).print(dbgs(), tri_);
         });
       return false; // Not coalescable
     }
@@ -1293,7 +1293,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
     return false; // Already done.
 
-  DEBUG(errs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+  DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
 
   unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
   bool isExtSubReg = CopyMI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG;
@@ -1313,7 +1313,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     if (SrcSubIdx && SrcSubIdx != DstSubIdx) {
       // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been
       // coalesced to a larger register so the subreg indices cancel out.
-      DEBUG(errs() << "\tSource of insert_subreg or subreg_to_reg is already "
+      DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already "
                       "coalesced to another register.\n");
       return false;  // Not coalescable.
     }
@@ -1329,7 +1329,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   // If they are already joined we continue.
   if (SrcReg == DstReg) {
-    DEBUG(errs() << "\tCopy already coalesced.\n");
+    DEBUG(dbgs() << "\tCopy already coalesced.\n");
     return false;  // Not coalescable.
   }
 
@@ -1338,17 +1338,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
 
   // If they are both physical registers, we cannot join them.
   if (SrcIsPhys && DstIsPhys) {
-    DEBUG(errs() << "\tCan not coalesce physregs.\n");
+    DEBUG(dbgs() << "\tCan not coalesce physregs.\n");
     return false;  // Not coalescable.
   }
 
   // We only join virtual registers with allocatable physical registers.
   if (SrcIsPhys && !allocatableRegs_[SrcReg]) {
-    DEBUG(errs() << "\tSrc reg is unallocatable physreg.\n");
+    DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n");
     return false;  // Not coalescable.
   }
   if (DstIsPhys && !allocatableRegs_[DstReg]) {
-    DEBUG(errs() << "\tDst reg is unallocatable physreg.\n");
+    DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n");
     return false;  // Not coalescable.
   }
 
@@ -1362,7 +1362,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx);
     assert(DstSubRC && "Illegal subregister index");
     if (!DstSubRC->contains(SrcSubReg)) {
-      DEBUG(errs() << "\tIncompatible destination regclass: "
+      DEBUG(dbgs() << "\tIncompatible destination regclass: "
                    << tri_->getName(SrcSubReg) << " not in "
                    << DstSubRC->getName() << ".\n");
       return false;             // Not coalescable.
@@ -1379,7 +1379,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
       SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
     assert(SrcSubRC && "Illegal subregister index");
     if (!SrcSubRC->contains(DstSubReg)) {
-      DEBUG(errs() << "\tIncompatible source regclass: "
+      DEBUG(dbgs() << "\tIncompatible source regclass: "
                    << tri_->getName(DstSubReg) << " not in "
                    << SrcSubRC->getName() << ".\n");
       (void)DstSubReg;
@@ -1405,7 +1405,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been
         // coalesced to a larger register so the subreg indices cancel out.
         if (DstSubIdx != SubIdx) {
-          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
+          DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       } else
@@ -1418,7 +1418,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been
         // coalesced to a larger register so the subreg indices cancel out.
         if (SrcSubIdx != SubIdx) {
-          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
+          DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       } else
@@ -1427,7 +1427,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     } else if ((DstIsPhys && isExtSubReg) ||
                (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
       if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
-        DEBUG(errs() << "\tSrc of extract_subreg already coalesced with reg"
+        DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg"
                      << " of a super-class.\n");
         return false; // Not coalescable.
       }
@@ -1451,7 +1451,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           // class as the would be resulting register.
           SubIdx = 0;
         else {
-          DEBUG(errs() << "\t Sub-register indices mismatch.\n");
+          DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
           return false; // Not coalescable.
         }
       }
@@ -1463,7 +1463,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
             NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx);
           }
         if (!NewRC) {
-          DEBUG(errs() << "\t Conflicting sub-register indices.\n");
+          DEBUG(dbgs() << "\t Conflicting sub-register indices.\n");
           return false;  // Not coalescable
         }
 
@@ -1535,7 +1535,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     } else if (!SrcIsPhys && !DstIsPhys) {
       NewRC = getCommonSubClass(SrcRC, DstRC);
       if (!NewRC) {
-        DEBUG(errs() << "\tDisjoint regclasses: "
+        DEBUG(dbgs() << "\tDisjoint regclasses: "
                      << SrcRC->getName() << ", "
                      << DstRC->getName() << ".\n");
         return false;           // Not coalescable.
@@ -1551,7 +1551,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
         (isExtSubReg || DstRC->isASubClass()) &&
         !isWinToJoinCrossClass(LargeReg, SmallReg,
                                allocatableRCRegs_[NewRC].count())) {
-      DEBUG(errs() << "\tSrc/Dest are different register classes.\n");
+      DEBUG(dbgs() << "\tSrc/Dest are different register classes.\n");
       // Allow the coalescer to try again in case either side gets coalesced to
       // a physical register that's compatible with the other side. e.g.
       // r1024 = MOV32to32_ r1025
@@ -1573,9 +1573,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
          "Register mapping is horribly broken!");
 
   DEBUG({
-      errs() << "\t\tInspecting "; SrcInt.print(errs(), tri_);
-      errs() << " and "; DstInt.print(errs(), tri_);
-      errs() << ": ";
+      dbgs() << "\t\tInspecting "; SrcInt.print(dbgs(), tri_);
+      dbgs() << " and "; DstInt.print(dbgs(), tri_);
+      dbgs() << ": ";
     });
 
   // Save a copy of the virtual register live interval. We'll manually
@@ -1606,7 +1606,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
             mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
             ++numAborts;
-            DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
+            DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
             Again = true;  // May be possible to coalesce later.
             return false;
           }
@@ -1614,7 +1614,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
             mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
             ++numAborts;
-            DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
+            DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
             Again = true;  // May be possible to coalesce later.
             return false;
           }
@@ -1635,7 +1635,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
                                    mri_->use_end()) / Length) < Ratio)) {
           mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
           ++numAborts;
-          DEBUG(errs() << "\tMay tie down a physical register, abort!\n");
+          DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
           Again = true;  // May be possible to coalesce later.
           return false;
         }
@@ -1654,7 +1654,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     // Only coalesce an empty interval (defined by implicit_def) with
     // another interval which has a valno defined by the CopyMI and the CopyMI
     // is a kill of the implicit def.
-    DEBUG(errs() << "Not profitable!\n");
+    DEBUG(dbgs() << "Not profitable!\n");
     return false;
   }
 
@@ -1676,7 +1676,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
     }
 
     // Otherwise, we are unable to join the intervals.
-    DEBUG(errs() << "Interference!\n");
+    DEBUG(dbgs() << "Interference!\n");
     Again = true;  // May be possible to coalesce later.
     return false;
   }
@@ -1779,9 +1779,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
   }
 
   DEBUG({
-      errs() << "\n\t\tJoined.  Result = ";
-      ResDstInt->print(errs(), tri_);
-      errs() << "\n";
+      dbgs() << "\n\t\tJoined.  Result = ";
+      ResDstInt->print(dbgs(), tri_);
+      dbgs() << "\n";
     });
 
   ++numJoins;
@@ -2134,8 +2134,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
           DEBUG({
-              errs() << "Interfere with sub-register ";
-              li_->getInterval(*SR).print(errs(), tri_);
+              dbgs() << "Interfere with sub-register ";
+              li_->getInterval(*SR).print(dbgs(), tri_);
             });
           return false;
         }
@@ -2151,8 +2151,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
         if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
           DEBUG({
-              errs() << "Interfere with sub-register ";
-              li_->getInterval(*SR).print(errs(), tri_);
+              dbgs() << "Interfere with sub-register ";
+              li_->getInterval(*SR).print(dbgs(), tri_);
             });
           return false;
         }
@@ -2413,7 +2413,7 @@ namespace {
 
 void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
                                                std::vector<CopyRec> &TryAgain) {
-  DEBUG(errs() << MBB->getName() << ":\n");
+  DEBUG(dbgs() << MBB->getName() << ":\n");
 
   std::vector<CopyRec> VirtCopies;
   std::vector<CopyRec> PhysCopies;
@@ -2478,7 +2478,7 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
 }
 
 void SimpleRegisterCoalescing::joinIntervals() {
-  DEBUG(errs() << "********** JOINING INTERVALS ***********\n");
+  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
 
   std::vector<CopyRec> TryAgainList;
   if (loopInfo->empty()) {
@@ -2610,12 +2610,11 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
   return NULL;
 }
 
-
 void SimpleRegisterCoalescing::printRegName(unsigned reg) const {
   if (TargetRegisterInfo::isPhysicalRegister(reg))
-    errs() << tri_->getName(reg);
+    dbgs() << tri_->getName(reg);
   else
-    errs() << "%reg" << reg;
+    dbgs() << "%reg" << reg;
 }
 
 void SimpleRegisterCoalescing::releaseMemory() {
@@ -2634,7 +2633,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   AA = &getAnalysis<AliasAnalysis>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
-  DEBUG(errs() << "********** SIMPLE REGISTER COALESCING **********\n"
+  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
                << "********** Function: "
                << ((Value*)mf_->getFunction())->getName() << '\n');
 
@@ -2648,11 +2647,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   if (EnableJoining) {
     joinIntervals();
     DEBUG({
-        errs() << "********** INTERVALS POST JOINING **********\n";
+        dbgs() << "********** INTERVALS POST JOINING **********\n";
         for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
              I != E; ++I){
-          I->second->print(errs(), tri_);
-          errs() << "\n";
+          I->second->print(dbgs(), tri_);
+          dbgs() << "\n";
         }
       });
   }
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 6de03e1..9558933 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -381,9 +381,6 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
       I->eraseFromParent();
     }
 
-
-
-
     // The entry block changes to have the eh.sjlj.setjmp, with a conditional
     // branch to a dispatch block for non-zero returns. If we return normally,
     // we're not handling an exception and just register the function context
@@ -397,13 +394,15 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
     // Insert a load in the Catch block, and a switch on its value.  By default,
     // we go to a block that just does an unwind (which is the correct action
     // for a standard call).
-    BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    BasicBlock *UnwindBlock =
+      BasicBlock::Create(F.getContext(), "unwindbb", &F);
     Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
 
     Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
                                        DispatchBlock);
     SwitchInst *DispatchSwitch =
-      SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), DispatchBlock);
+      SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+                         DispatchBlock);
     // Split the entry block to insert the conditional branch for the setjmp.
     BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
                                                      "eh.sjlj.setjmp.cont");
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 782af12..b8f529b 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -192,18 +192,18 @@ void SlotIndexes::renumberIndexes() {
 void SlotIndexes::dump() const {
   for (const IndexListEntry *itr = front(); itr != getTail();
        itr = itr->getNext()) {
-    errs() << itr->getIndex() << " ";
+    dbgs() << itr->getIndex() << " ";
 
     if (itr->getInstr() != 0) {
-      errs() << *itr->getInstr();
+      dbgs() << *itr->getInstr();
     } else {
-      errs() << "\n";
+      dbgs() << "\n";
     }
   }
 
   for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin();
        itr != mbb2IdxMap.end(); ++itr) {
-    errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
+    dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
            << itr->second.first << ", " << itr->second.second << "]\n";
   }
 }
@@ -217,7 +217,7 @@ void SlotIndex::print(raw_ostream &os) const {
 
 // Dump a SlotIndex to stderr.
 void SlotIndex::dump() const {
-  print(errs());
-  errs() << "\n";
+  print(dbgs());
+  dbgs() << "\n";
 }
 
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index bec9294..7ba4403 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -67,7 +67,7 @@ protected:
   /// immediately before each use, and stores after each def. No folding or
   /// remat is attempted.
   std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
-    DEBUG(errs() << "Spilling everywhere " << *li << "\n");
+    DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
 
     assert(li->weight != HUGE_VALF &&
            "Attempting to spill already spilled value.");
@@ -75,7 +75,7 @@ protected:
     assert(!li->isStackSlot() &&
            "Trying to spill a stack slot.");
 
-    DEBUG(errs() << "Trivial spill everywhere of reg" << li->reg << "\n");
+    DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
 
     std::vector<LiveInterval*> added;
     
@@ -89,7 +89,7 @@ protected:
       // Grab the use/def instr.
       MachineInstr *mi = &*regItr;
 
-      DEBUG(errs() << "  Processing " << *mi);
+      DEBUG(dbgs() << "  Processing " << *mi);
 
       // Step regItr to the next use/def instr.
       do {
@@ -242,7 +242,7 @@ private:
   std::vector<LiveInterval*> tryVNISplit(LiveInterval *li,
                                          SlotIndex *earliestStart) {
 
-    DEBUG(errs() << "Trying VNI split of %reg" << *li << "\n");
+    DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
 
     std::vector<LiveInterval*> added;
     SmallVector<VNInfo*, 4> vnis;
@@ -257,11 +257,11 @@ private:
       if (vni->isUnused() || vni->kills.empty())
         continue;
 
-      DEBUG(errs() << "  Extracted Val #" << vni->id << " as ");
+      DEBUG(dbgs() << "  Extracted Val #" << vni->id << " as ");
       LiveInterval *splitInterval = extractVNI(li, vni);
       
       if (splitInterval != 0) {
-        DEBUG(errs() << *splitInterval << "\n");
+        DEBUG(dbgs() << *splitInterval << "\n");
         added.push_back(splitInterval);
         alreadySplit.insert(splitInterval);
         if (earliestStart != 0) {
@@ -269,11 +269,11 @@ private:
             *earliestStart = splitInterval->beginIndex();
         }
       } else {
-        DEBUG(errs() << "0\n");
+        DEBUG(dbgs() << "0\n");
       }
     } 
 
-    DEBUG(errs() << "Original LI: " << *li << "\n");
+    DEBUG(dbgs() << "Original LI: " << *li << "\n");
 
     // If there original interval still contains some live ranges
     // add it to added and alreadySplit.    
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index e8ee822..48bb5af 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -113,7 +113,7 @@ bool StackProtector::RequiresStackProtector() const {
 
         if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
           // We apparently only care about character arrays.
-          if (AT->getElementType() != Type::getInt8Ty(AT->getContext()))
+          if (!AT->getElementType()->isInteger(8))
             continue;
 
           // If an array has more than SSPBufferSize bytes of allocated space,
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index fd25a37..2170703 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -200,7 +200,7 @@ void StackSlotColoring::InitializeSlots() {
   Assignments.resize(LastFI);
 
   // Gather all spill slots into a list.
-  DEBUG(errs() << "Spill slot intervals:\n");
+  DEBUG(dbgs() << "Spill slot intervals:\n");
   for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
     LiveInterval &li = i->second;
     DEBUG(li.dump());
@@ -212,7 +212,7 @@ void StackSlotColoring::InitializeSlots() {
     OrigSizes[FI]      = MFI->getObjectSize(FI);
     AllColors.set(FI);
   }
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
 
   // Sort them by weight.
   std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
@@ -244,7 +244,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
     return false;
 
   bool Changed = false;
-  DEBUG(errs() << "Assigning unused registers to spill slots:\n");
+  DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
     int SS = li->getStackSlotIndex();
@@ -274,7 +274,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
         AllColored = false;
         continue;
       } else {
-        DEBUG(errs() << "Assigning fi#" << RSS << " to "
+        DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
                      << TRI->getName(Reg) << '\n');
         ColoredRegs.push_back(Reg);
         SlotMapping[RSS] = Reg;
@@ -302,7 +302,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
       ++NumEliminated;
     }
   }
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
 
   return Changed;
 }
@@ -337,7 +337,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
   // Record the assignment.
   Assignments[Color].push_back(li);
   int FI = li->getStackSlotIndex();
-  DEBUG(errs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+  DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
 
   // Change size and alignment of the allocated slot. If there are multiple
   // objects sharing the same slot, then make sure the size and alignment
@@ -361,7 +361,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   BitVector SlotIsReg(NumObjs);
   BitVector UsedColors(NumObjs);
 
-  DEBUG(errs() << "Color spill slot intervals:\n");
+  DEBUG(dbgs() << "Color spill slot intervals:\n");
   bool Changed = false;
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
@@ -375,7 +375,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
     Changed |= (SS != NewSS);
   }
 
-  DEBUG(errs() << "\nSpill slots after coloring:\n");
+  DEBUG(dbgs() << "\nSpill slots after coloring:\n");
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
     LiveInterval *li = SSIntervals[i];
     int SS = li->getStackSlotIndex();
@@ -387,7 +387,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
 #ifndef NDEBUG
   for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
     DEBUG(SSIntervals[i]->dump());
-  DEBUG(errs() << '\n');
+  DEBUG(dbgs() << '\n');
 #endif
 
   // Can we "color" a stack slot with a unused register?
@@ -419,7 +419,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
 
   // Delete unused stack slots.
   while (NextColor != -1) {
-    DEBUG(errs() << "Removing unused stack object fi#" << NextColor << "\n");
+    DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
     MFI->RemoveStackObject(NextColor);
     NextColor = AllColors.find_next(NextColor);
   }
@@ -605,7 +605,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
   MachineBasicBlock *MBB = MI->getParent();
   if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
     if (PropagateForward(MI, MBB, DstReg, Reg)) {
-      DEBUG(errs() << "Eliminated load: ");
+      DEBUG(dbgs() << "Eliminated load: ");
       DEBUG(MI->dump());
       ++NumLoadElim;
     } else {
@@ -621,7 +621,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
     }
   } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
     if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
-      DEBUG(errs() << "Eliminated store: ");
+      DEBUG(dbgs() << "Eliminated store: ");
       DEBUG(MI->dump());
       ++NumStoreElim;
     } else {
@@ -699,7 +699,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
 
 
 bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "********** Stack Slot Coloring **********\n");
+  DEBUG(dbgs() << "********** Stack Slot Coloring **********\n");
 
   MFI = MF.getFrameInfo();
   MRI = &MF.getRegInfo(); 
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 3c13906..bd7cb75 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -555,7 +555,7 @@ void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
     // Add the renaming set for this PHI node to our overall renaming information
     for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
          QE = PHIUnion.end(); QI != QE; ++QI) {
-      DEBUG(errs() << "Adding Renaming: " << QI->first << " -> "
+      DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> "
                    << P->getOperand(0).getReg() << "\n");
     }
     
@@ -698,7 +698,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
         TII->copyRegToReg(*PI->getParent(), PI, t,
                           curr.second, RC, RC);
         
-        DEBUG(errs() << "Inserted copy from " << curr.second << " to " << t
+        DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
                      << "\n");
         
         // Push temporary on Stacks
@@ -715,7 +715,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
       TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
                         map[curr.first], RC, RC);
       map[curr.first] = curr.second;
-      DEBUG(errs() << "Inserted copy from " << curr.first << " to "
+      DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
                    << curr.second << "\n");
       
       // Push this copy onto InsertedPHICopies so we can
@@ -928,7 +928,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
         unsigned reg = OI->first;
         ++OI;
         I->second.erase(reg);
-        DEBUG(errs() << "Removing Renaming: " << reg << " -> " << I->first
+        DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first
                      << "\n");
       }
     }
@@ -946,7 +946,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
     while (I->second.size()) {
       std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
       
-      DEBUG(errs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
+      DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
       
       if (SI->first != I->first) {
         if (mergeLiveIntervals(I->first, SI->first)) {
@@ -978,7 +978,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
           R.valno->setCopy(--SI->second->getFirstTerminator());
           R.valno->def = instrIdx.getDefIndex();
           
-          DEBUG(errs() << "Renaming failed: " << SI->first << " -> "
+          DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> "
                        << I->first << "\n");
         }
       }
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index bf58902..f51f74d 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -139,8 +139,8 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
           }
         }
         if (!Found) {
-          errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
-          errs() << "  missing input from predecessor BB#"
+          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          dbgs() << "  missing input from predecessor BB#"
                  << PredBB->getNumber() << '\n';
           llvm_unreachable(0);
         }
@@ -150,14 +150,14 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
         MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
         if (CheckExtra && !Preds.count(PHIBB)) {
           // This is not a hard error.
-          errs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+          dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
                  << ": " << *MI;
-          errs() << "  extra input from predecessor BB#"
+          dbgs() << "  extra input from predecessor BB#"
                  << PHIBB->getNumber() << '\n';
         }
         if (PHIBB->getNumber() < 0) {
-          errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
-          errs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
+          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          dbgs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
           llvm_unreachable(0);
         }
       }
@@ -173,7 +173,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
   bool MadeChange = false;
 
   if (PreRegAlloc && TailDupVerify) {
-    DEBUG(errs() << "\n*** Before tail-duplicating\n");
+    DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
     VerifyPHIs(MF, true);
   }
 
@@ -253,7 +253,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
         SSAUpdateVals.clear();
       }
 
-      // Eliminate some of the copies inserted tail duplication to maintain
+      // Eliminate some of the copies inserted by tail duplication to maintain
       // SSA form.
       for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
         MachineInstr *Copy = Copies[i];
@@ -346,7 +346,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
                                      MachineBasicBlock *PredBB,
                                      MachineFunction &MF,
                                      DenseMap<unsigned, unsigned> &LocalVRMap) {
-  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+  MachineInstr *NewMI = TII->duplicate(MI, MF);
   for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = NewMI->getOperand(i);
     if (!MO.isReg())
@@ -437,8 +437,11 @@ bool
 TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                  SmallVector<MachineInstr*, 16> &Copies) {
-  // Don't try to tail-duplicate single-block loops.
-  if (TailBB->isSuccessor(TailBB))
+  // Pre-regalloc tail duplication hurts compile time and doesn't help
+  // much except for indirect branches.
+  bool hasIndirectBranch = (!TailBB->empty() &&
+                            TailBB->back().getDesc().isIndirectBranch());
+  if (PreRegAlloc && !hasIndirectBranch)
     return false;
 
   // Set the limit on the number of instructions to duplicate, with a default
@@ -446,7 +449,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
-  if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch())
+  if (hasIndirectBranch)
     // If the target has hardware branch prediction that can handle indirect
     // branches, duplicating them can often make them predictable when there
     // are common paths through the code.  The limit needs to be high enough
@@ -457,6 +460,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   else
     MaxDuplicateCount = TailDuplicateSize;
 
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB->isSuccessor(TailBB))
+    return false;
+
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
@@ -481,7 +488,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   if (InstrCount > 1 && HasCall)
     return false;
 
-  DEBUG(errs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+  DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
 
   // Iterate through all the unique predecessors and tail-duplicate this
   // block into them, if possible. Copying the list ahead of time also
@@ -510,7 +517,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
     if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
       continue;
 
-    DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
+    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
                  << "From Succ: " << *TailBB);
 
     TDBBs.push_back(PredBB);
@@ -570,7 +577,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
   if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
       TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 &&
       !TailBB->hasAddressTaken()) {
-    DEBUG(errs() << "\nMerging into block: " << *PrevBB
+    DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
           << "From MBB: " << *TailBB);
     if (PreRegAlloc) {
       DenseMap<unsigned, unsigned> LocalVRMap;
@@ -620,7 +627,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
 /// function, updating the CFG.
 void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
   assert(MBB->pred_empty() && "MBB must be dead!");
-  DEBUG(errs() << "\nRemoving MBB: " << *MBB);
+  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
 
   // Remove all successors.
   while (!MBB->succ_empty())
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 393e315..a0fccab 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -150,6 +150,13 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
   MBB.insert(I, MI);
 }
 
+MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
+                                             MachineFunction &MF) const {
+  assert(!Orig->getDesc().isNotDuplicable() &&
+         "Instruction cannot be duplicated");
+  return MF.CloneMachineInstr(Orig);
+}
+
 bool
 TargetInstrInfoImpl::isIdentical(const MachineInstr *MI,
                                  const MachineInstr *Other,
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 98b95ac..a3f6364 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -573,15 +573,15 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
                                MachineFunction::iterator &mbbi,
                                unsigned RegB, unsigned RegC, unsigned Dist) {
   MachineInstr *MI = mi;
-  DEBUG(errs() << "2addr: COMMUTING  : " << *MI);
+  DEBUG(dbgs() << "2addr: COMMUTING  : " << *MI);
   MachineInstr *NewMI = TII->commuteInstruction(MI);
 
   if (NewMI == 0) {
-    DEBUG(errs() << "2addr: COMMUTING FAILED!\n");
+    DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
     return false;
   }
 
-  DEBUG(errs() << "2addr: COMMUTED TO: " << *NewMI);
+  DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
   // If the instruction changed to commute it, update livevar.
   if (NewMI != MI) {
     if (LV)
@@ -628,8 +628,8 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
                                               unsigned RegB, unsigned Dist) {
   MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
   if (NewMI) {
-    DEBUG(errs() << "2addr: CONVERTING 2-ADDR: " << *mi);
-    DEBUG(errs() << "2addr:         TO 3-ADDR: " << *NewMI);
+    DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+    DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
     bool Sunk = false;
 
     if (NewMI->findRegisterUseOperand(RegB, false, TRI))
@@ -891,7 +891,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 /// runOnMachineFunction - Reduce two-address instructions to two operands.
 ///
 bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(errs() << "Machine Function\n");
+  DEBUG(dbgs() << "Machine Function\n");
   const TargetMachine &TM = MF.getTarget();
   MRI = &MF.getRegInfo();
   TII = TM.getInstrInfo();
@@ -901,8 +901,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
 
   bool MadeChange = false;
 
-  DEBUG(errs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
-  DEBUG(errs() << "********** Function: " 
+  DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+  DEBUG(dbgs() << "********** Function: " 
         << MF.getFunction()->getName() << '\n');
 
   // ReMatRegs - Keep track of the registers whose def's are remat'ed.
@@ -943,7 +943,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
         if (FirstTied) {
           FirstTied = false;
           ++NumTwoAddressInstrs;
-          DEBUG(errs() << '\t' << *mi);
+          DEBUG(dbgs() << '\t' << *mi);
         }
 
         assert(mi->getOperand(SrcIdx).isReg() &&
@@ -1024,7 +1024,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
               DefMI->getDesc().isAsCheapAsAMove() &&
               DefMI->isSafeToReMat(TII, regB, AA) &&
               isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
-            DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n");
+            DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
             unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
             TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
             ReMatRegs.set(regB);
@@ -1040,7 +1040,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           DistanceMap.insert(std::make_pair(prevMI, Dist));
           DistanceMap[mi] = ++Dist;
 
-          DEBUG(errs() << "\t\tprepend:\t" << *prevMI);
+          DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
 
           MachineOperand &MO = mi->getOperand(SrcIdx);
           assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
@@ -1085,7 +1085,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           
         MadeChange = true;
 
-        DEBUG(errs() << "\t\trewrite to:\t" << *mi);
+        DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
       }
 
       // Clear TiedOperands here instead of at the top of the loop
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index c8c5d86..d4fb2e4 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -278,5 +278,5 @@ void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
 }
 
 void VirtRegMap::dump() const {
-  print(errs());
+  print(dbgs());
 }
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 054c3b6..df2b8d2 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -60,6 +60,33 @@ ScheduleSpills("schedule-spills",
 
 VirtRegRewriter::~VirtRegRewriter() {}
 
+/// substitutePhysReg - Replace virtual register in MachineOperand with a
+/// physical register. Do the right thing with the sub-register index.
+static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
+                              const TargetRegisterInfo &TRI) {
+  if (unsigned SubIdx = MO.getSubReg()) {
+    // Insert the physical subreg and reset the subreg field.
+    MO.setReg(TRI.getSubReg(Reg, SubIdx));
+    MO.setSubReg(0);
+
+    // Any def, dead, and kill flags apply to the full virtual register, so they
+    // also apply to the full physical register. Add imp-def/dead and imp-kill
+    // as needed.
+    MachineInstr &MI = *MO.getParent();
+    if (MO.isDef())
+      if (MO.isDead())
+        MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true);
+      else
+        MI.addRegisterDefined(Reg, &TRI);
+    else if (!MO.isUndef() &&
+             (MO.isKill() ||
+              MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
+      MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
+  } else {
+    MO.setReg(Reg);
+  }
+}
+
 namespace {
 
 /// This class is intended for use with the new spilling framework only. It
@@ -69,10 +96,10 @@ struct TrivialRewriter : public VirtRegRewriter {
 
   bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
                             LiveIntervals* LIs) {
-    DEBUG(errs() << "********** REWRITE MACHINE CODE **********\n");
-    DEBUG(errs() << "********** Function: " 
+    DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
+    DEBUG(dbgs() << "********** Function: " 
           << MF.getFunction()->getName() << '\n');
-    DEBUG(errs() << "**** Machine Instrs"
+    DEBUG(dbgs() << "**** Machine Instrs"
           << "(NOTE! Does not include spills and reloads!) ****\n");
     DEBUG(MF.dump());
 
@@ -101,16 +128,13 @@ struct TrivialRewriter : public VirtRegRewriter {
           MachineOperand &mop = regItr.getOperand();
           assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?");
           ++regItr;
-          unsigned subRegIdx = mop.getSubReg();
-          unsigned pRegOp = subRegIdx ? tri->getSubReg(pReg, subRegIdx) : pReg;
-          mop.setReg(pRegOp);
-          mop.setSubReg(0);
+          substitutePhysReg(mop, pReg, *tri);
           changed = true;
         }
       }
     }
     
-    DEBUG(errs() << "**** Post Machine Instrs ****\n");
+    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
     DEBUG(MF.dump());
     
     return changed;
@@ -191,11 +215,11 @@ public:
                                               (unsigned)CanClobber;
 
     if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DEBUG(errs() << "Remembering RM#"
+      DEBUG(dbgs() << "Remembering RM#"
                    << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
     else
-      DEBUG(errs() << "Remembering SS#" << SlotOrReMat);
-    DEBUG(errs() << " in physreg " << TRI->getName(Reg) << "\n");
+      DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
+    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n");
   }
 
   /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
@@ -647,12 +671,9 @@ static void ReMaterialize(MachineBasicBlock &MBB,
     if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
       continue;
     assert(MO.isUse());
-    unsigned SubIdx = MO.getSubReg();
     unsigned Phys = VRM.getPhys(VirtReg);
     assert(Phys && "Virtual register is not assigned a register?");
-    unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys;
-    MO.setReg(RReg);
-    MO.setSubReg(0);
+    substitutePhysReg(MO, Phys, *TRI);
   }
   ++NumReMats;
 }
@@ -686,7 +707,7 @@ void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
     assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
            "Bidirectional map mismatch!");
     SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
-    DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg)
+    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
          << " copied, it is available for use but can no longer be modified\n");
   }
 }
@@ -711,12 +732,12 @@ void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
     assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
            "Bidirectional map mismatch!");
     SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
-    DEBUG(errs() << "PhysReg " << TRI->getName(PhysReg)
+    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
           << " clobbered, invalidating ");
     if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DEBUG(errs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
+      DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
     else
-      DEBUG(errs() << "SS#" << SlotOrReMat << "\n");
+      DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
   }
 }
 
@@ -895,9 +916,9 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
 
         Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
         UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-        DEBUG(errs() << '\t' << *prior(InsertLoc));
+        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
         
-        DEBUG(errs() << "Reuse undone!\n");
+        DEBUG(dbgs() << "Reuse undone!\n");
         --NumReused;
         
         // Finally, PhysReg is now available, go ahead and use it.
@@ -1004,11 +1025,12 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
 }
 
 static
-void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg) {
+void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
+                         const TargetRegisterInfo &TRI) {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.getReg() == VirtReg)
-      MO.setReg(PhysReg);
+      substitutePhysReg(MO, PhysReg, TRI);
   }
 }
 
@@ -1041,9 +1063,9 @@ public:
     TRI = MF.getTarget().getRegisterInfo();
     TII = MF.getTarget().getInstrInfo();
     AllocatableRegs = TRI->getAllocatableSet(MF);
-    DEBUG(errs() << "\n**** Local spiller rewriting function '"
+    DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
           << MF.getFunction()->getName() << "':\n");
-    DEBUG(errs() << "**** Machine Instrs (NOTE! Does not include spills and"
+    DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
                     " reloads!) ****\n");
     DEBUG(MF.dump());
 
@@ -1095,7 +1117,7 @@ public:
       Spills.clear();
     }
 
-    DEBUG(errs() << "**** Post Machine Instrs ****\n");
+    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
     DEBUG(MF.dump());
 
     // Mark unused spill slots.
@@ -1175,7 +1197,7 @@ private:
     if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
       llvm_unreachable("Unable unfold the load / store folding instruction!");
     assert(NewMIs.size() == 1);
-    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
+    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
     VRM.transferRestorePts(&MI, NewMIs[0]);
     MII = MBB.insert(MII, NewMIs[0]);
     InvalidateKills(MI, TRI, RegKills, KillOps);
@@ -1191,7 +1213,7 @@ private:
       if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
         llvm_unreachable("Unable unfold the load / store folding instruction!");
       assert(NewMIs.size() == 1);
-      AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg);
+      AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
       VRM.transferRestorePts(&NextMI, NewMIs[0]);
       MBB.insert(NextMII, NewMIs[0]);
       InvalidateKills(NextMI, TRI, RegKills, KillOps);
@@ -1467,11 +1489,11 @@ private:
     TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
     MachineInstr *StoreMI = prior(oldNextMII);
     VRM.addSpillSlotUse(StackSlot, StoreMI);
-    DEBUG(errs() << "Store:\t" << *StoreMI);
+    DEBUG(dbgs() << "Store:\t" << *StoreMI);
 
     // If there is a dead store to this stack slot, nuke it now.
     if (LastStore) {
-      DEBUG(errs() << "Removed dead store:\t" << *LastStore);
+      DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
       ++NumDSE;
       SmallVector<unsigned, 2> KillRegs;
       InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
@@ -1599,7 +1621,7 @@ private:
                   AvailableSpills &Spills, BitVector &RegKills,
                   std::vector<MachineOperand*> &KillOps) {
 
-    DEBUG(errs() << "\n**** Local spiller rewriting MBB '"
+    DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
           << MBB.getName() << "':\n");
 
     MachineFunction &MF = *MBB.getParent();
@@ -1699,11 +1721,11 @@ private:
             // If the value is already available in the expected register, save
             // a reload / remat.
             if (SSorRMId)
-              DEBUG(errs() << "Reusing RM#"
+              DEBUG(dbgs() << "Reusing RM#"
                            << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DEBUG(errs() << "Reusing SS#" << SSorRMId);
-            DEBUG(errs() << " from physreg "
+              DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+            DEBUG(dbgs() << " from physreg "
                          << TRI->getName(InReg) << " for vreg"
                          << VirtReg <<" instead of reloading into physreg "
                          << TRI->getName(Phys) << '\n');
@@ -1711,11 +1733,11 @@ private:
             continue;
           } else if (InReg && InReg != Phys) {
             if (SSorRMId)
-              DEBUG(errs() << "Reusing RM#"
+              DEBUG(dbgs() << "Reusing RM#"
                            << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DEBUG(errs() << "Reusing SS#" << SSorRMId);
-            DEBUG(errs() << " from physreg "
+              DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+            DEBUG(dbgs() << " from physreg "
                          << TRI->getName(InReg) << " for vreg"
                          << VirtReg <<" by copying it into physreg "
                          << TRI->getName(Phys) << '\n');
@@ -1742,7 +1764,7 @@ private:
             KillOpnd->setIsKill();
             UpdateKills(*CopyMI, TRI, RegKills, KillOps);
 
-            DEBUG(errs() << '\t' << *CopyMI);
+            DEBUG(dbgs() << '\t' << *CopyMI);
             ++NumCopified;
             continue;
           }
@@ -1769,7 +1791,7 @@ private:
           Spills.addAvailable(SSorRMId, Phys);
 
           UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-          DEBUG(errs() << '\t' << *prior(MII));
+          DEBUG(dbgs() << '\t' << *prior(MII));
         }
       }
 
@@ -1789,7 +1811,7 @@ private:
           TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC);
           MachineInstr *StoreMI = prior(oldNextMII);
           VRM.addSpillSlotUse(StackSlot, StoreMI);
-          DEBUG(errs() << "Store:\t" << *StoreMI);
+          DEBUG(dbgs() << "Store:\t" << *StoreMI);
           VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
         }
         NextMII = llvm::next(MII);
@@ -1840,16 +1862,14 @@ private:
           RegInfo->setPhysRegUsed(Phys);
           if (MO.isDef())
             ReusedOperands.markClobbered(Phys);
-          unsigned RReg = SubIdx ? TRI->getSubReg(Phys, SubIdx) : Phys;
-          MI.getOperand(i).setReg(RReg);
-          MI.getOperand(i).setSubReg(0);
+          substitutePhysReg(MO, Phys, *TRI);
           if (VRM.isImplicitlyDefined(VirtReg))
             // FIXME: Is this needed?
             BuildMI(MBB, &MI, MI.getDebugLoc(),
-                    TII->get(TargetInstrInfo::IMPLICIT_DEF), RReg);
+                    TII->get(TargetInstrInfo::IMPLICIT_DEF), Phys);
           continue;
         }
-        
+
         // This virtual register is now known to be a spilled value.
         if (!MO.isUse())
           continue;  // Handle defs in the loop below (handle use&def here though)
@@ -1908,11 +1928,11 @@ private:
           if (CanReuse) {
             // If this stack slot value is already available, reuse it!
             if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DEBUG(errs() << "Reusing RM#"
+              DEBUG(dbgs() << "Reusing RM#"
                            << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DEBUG(errs() << "Reusing SS#" << ReuseSlot);
-            DEBUG(errs() << " from physreg "
+              DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+            DEBUG(dbgs() << " from physreg "
                          << TRI->getName(PhysReg) << " for vreg"
                          << VirtReg <<" instead of reloading into physreg "
                          << TRI->getName(VRM.getPhys(VirtReg)) << '\n');
@@ -1991,11 +2011,11 @@ private:
           if (DesignatedReg == PhysReg) {
             // If this stack slot value is already available, reuse it!
             if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-              DEBUG(errs() << "Reusing RM#"
+              DEBUG(dbgs() << "Reusing RM#"
                     << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
             else
-              DEBUG(errs() << "Reusing SS#" << ReuseSlot);
-            DEBUG(errs() << " from physreg " << TRI->getName(PhysReg)
+              DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+            DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
                          << " for vreg" << VirtReg
                          << " instead of reloading into same physreg.\n");
             unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
@@ -2029,7 +2049,7 @@ private:
             SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
           MI.getOperand(i).setReg(RReg);
           MI.getOperand(i).setSubReg(0);
-          DEBUG(errs() << '\t' << *prior(MII));
+          DEBUG(dbgs() << '\t' << *prior(MII));
           ++NumReused;
           continue;
         } // if (PhysReg)
@@ -2082,7 +2102,7 @@ private:
           }
 
           UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-          DEBUG(errs() << '\t' << *prior(InsertLoc));
+          DEBUG(dbgs() << '\t' << *prior(InsertLoc));
         }
         unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
         MI.getOperand(i).setReg(RReg);
@@ -2096,7 +2116,7 @@ private:
         int PDSSlot = PotentialDeadStoreSlots[j];
         MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
         if (DeadStore) {
-          DEBUG(errs() << "Removed dead store:\t" << *DeadStore);
+          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
           InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
           VRM.RemoveMachineInstrFromMaps(DeadStore);
           MBB.erase(DeadStore);
@@ -2106,7 +2126,7 @@ private:
       }
 
 
-      DEBUG(errs() << '\t' << MI);
+      DEBUG(dbgs() << '\t' << MI);
 
 
       // If we have folded references to memory operands, make sure we clear all
@@ -2116,7 +2136,7 @@ private:
       for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ) {
         unsigned VirtReg = I->second.first;
         VirtRegMap::ModRef MR = I->second.second;
-        DEBUG(errs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
+        DEBUG(dbgs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
 
         // MI2VirtMap be can updated which invalidate the iterator.
         // Increment the iterator first.
@@ -2125,7 +2145,7 @@ private:
         if (SS == VirtRegMap::NO_STACK_SLOT)
           continue;
         FoldedSS.insert(SS);
-        DEBUG(errs() << " - StackSlot: " << SS << "\n");
+        DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
         
         // If this folded instruction is just a use, check to see if it's a
         // straight load from the virt reg slot.
@@ -2136,7 +2156,7 @@ private:
             // If this spill slot is available, turn it into a copy (or nothing)
             // instead of leaving it as a load!
             if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
-              DEBUG(errs() << "Promoted Load To Copy: " << MI);
+              DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
               if (DestReg != InReg) {
                 const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
                 TII->copyRegToReg(MBB, &MI, DestReg, InReg, RC, RC);
@@ -2160,7 +2180,7 @@ private:
 
                 BackTracked = true;
               } else {
-                DEBUG(errs() << "Removing now-noop copy: " << MI);
+                DEBUG(dbgs() << "Removing now-noop copy: " << MI);
                 // Unset last kill since it's being reused.
                 InvalidateKill(InReg, TRI, RegKills, KillOps);
                 Spills.disallowClobberPhysReg(InReg);
@@ -2230,7 +2250,7 @@ private:
 
           if (isDead) {  // Previous store is dead.
             // If we get here, the store is dead, nuke it now.
-            DEBUG(errs() << "Removed dead store:\t" << *DeadStore);
+            DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
             InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
             VRM.RemoveMachineInstrFromMaps(DeadStore);
             MBB.erase(DeadStore);
@@ -2301,7 +2321,7 @@ private:
           if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
               !MI.findRegisterUseOperand(Src)->isUndef()) {
             ++NumDCE;
-            DEBUG(errs() << "Removing now-noop copy: " << MI);
+            DEBUG(dbgs() << "Removing now-noop copy: " << MI);
             SmallVector<unsigned, 2> KillRegs;
             InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
             if (MO.isDead() && !KillRegs.empty()) {
@@ -2389,7 +2409,7 @@ private:
             unsigned Src, Dst, SrcSR, DstSR;
             if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
               ++NumDCE;
-              DEBUG(errs() << "Removing now-noop copy: " << MI);
+              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
               InvalidateKills(MI, TRI, RegKills, KillOps);
               VRM.RemoveMachineInstrFromMaps(&MI);
               MBB.erase(&MI);