130 files changed, 5914 insertions, 2261 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 8bf1b7c..08dc07c 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -93,7 +93,6 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
 FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM);
 FunctionPass *createARMCodePrinterPass(raw_ostream &O,
                                        ARMBaseTargetMachine &TM,
-                                       CodeGenOpt::Level OptLevel,
                                        bool Verbose);
 FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index 005eb7a..15c9ec1 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -305,7 +305,7 @@ namespace ARM_AM {
   ///     abcdefgh abcdefgh abcdefgh abcdefgh    control = 3
   /// Return -1 if none of the above apply.
   /// See ARM Reference Manual A6.3.2.
-  static inline int getT2SOImmValSplat (unsigned V) {
+  static inline int getT2SOImmValSplat(unsigned V) {
     unsigned u, Vs, Imm;
     // control = 0
     if ((V & 0xffffff00) == 0) 
@@ -459,13 +459,13 @@ namespace ARM_AM {
   //
   // addrmode5 := reg +/- imm8*4
   //
-  // The first operand is always a Reg.  The third field encodes the operation
-  // in bit 8, the immediate in bits 0-7.
+  // The first operand is always a Reg.  The second operand encodes the
+  // operation in bit 8 and the immediate in bits 0-7.
   //
-  // This can also be used for FP load/store multiple ops. The third field encodes
-  // writeback mode in bit 8, the number of registers (or 2 times the number of
-  // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
-  // following two sub-modes:
+  // This is also used for FP load/store multiple ops. The second operand
+  // encodes the writeback mode in bit 8 and the number of registers (or 2
+  // times the number of registers for DPR ops) in bits 0-7. In addition,
+  // bits 9-11 encode one of the following two sub-modes:
   //
   //    IA - Increment after
   //    DB - Decrement before
@@ -496,7 +496,29 @@ namespace ARM_AM {
   static inline bool getAM5WBFlag(unsigned AM5Opc) {
     return ((AM5Opc >> 8) & 1);
   }
-  
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #6
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for NEON load / store instructions.
+  //
+  // addrmode6 := reg with optional writeback
+  //
+  // This is stored in three operands [regaddr, regupdate, opc].  The first is
+  // the address register.  The second register holds the value of a post-access
+  // increment for writeback or reg0 if no writeback or if the writeback
+  // increment is the size of the memory access.  The third operand encodes
+  // whether there is writeback to the address register.
+
+  static inline unsigned getAM6Opc(bool WB = false) {
+    return (int)WB;
+  }
+
+  static inline bool getAM6WBFlag(unsigned Mode) {
+    return Mode & 1;
+  }
+
 } // end namespace ARM_AM
 } // end namespace llvm
 
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 8424c2e..f295761 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -1155,16 +1155,17 @@ void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) {
   const TargetInstrDesc &TID = MI.getDesc();
 
   // Handle jump tables.
-  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
+  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd ||
+      TID.Opcode == ARM::t2BR_JTr || TID.Opcode == ARM::t2BR_JTadd) {
     // First emit a ldr pc, [] instruction.
     emitDataProcessingInstruction(MI, ARM::PC);
 
     // Then emit the inline jump table.
-    unsigned JTIndex = (TID.Opcode == ARM::BR_JTr)
+    unsigned JTIndex = (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::t2BR_JTr)
       ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
     emitInlineJumpTable(JTIndex);
     return;
-  } else if (TID.Opcode == ARM::BR_JTm) {
+  } else if (TID.Opcode == ARM::BR_JTm || TID.Opcode == ARM::t2BR_JTm) {
     // First emit a ldr pc, [] instruction.
     emitLoadStoreInstruction(MI, ARM::PC);
 
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index db723fe..9fedaa4 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -124,6 +124,7 @@ namespace {
     const TargetInstrInfo *TII;
     ARMFunctionInfo *AFI;
     bool isThumb;
+    bool isThumb2;
   public:
     static char ID;
     ARMConstantIslands() : MachineFunctionPass(&ID) {}
@@ -213,6 +214,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
   TII = Fn.getTarget().getInstrInfo();
   AFI = Fn.getInfo<ARMFunctionInfo>();
   isThumb = AFI->isThumbFunction();
+  isThumb2 = AFI->isThumb2Function();
 
   HasFarJump = false;
 
@@ -376,6 +378,9 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
         int UOpc = Opc;
         switch (Opc) {
         case ARM::tBR_JTr:
+        case ARM::t2BR_JTr:
+        case ARM::t2BR_JTm:
+        case ARM::t2BR_JTadd:
           // A Thumb table jump may involve padding; for the offsets to
           // be right, functions containing these must be 4-byte aligned.
           AFI->setAlign(2U);
@@ -402,6 +407,16 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
           Bits = 11;
           Scale = 2;
           break;
+        case ARM::t2Bcc:
+          isCond = true;
+          UOpc = ARM::t2B;
+          Bits = 20;
+          Scale = 2;
+          break;
+        case ARM::t2B:
+          Bits = 24;
+          Scale = 2;
+          break;
         }
 
         // Record this immediate branch.
@@ -447,21 +462,25 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
             Bits = 8;
             Scale = 4;  // +-(offset_8*4)
             break;
-          case ARMII::AddrModeT1:
+            // addrmode6 has no immediate offset.
+          case ARMII::AddrModeT1_1:
             Bits = 5;  // +offset_5
             break;
-          case ARMII::AddrModeT2:
+          case ARMII::AddrModeT1_2:
             Bits = 5;
             Scale = 2;  // +(offset_5*2)
             break;
-          case ARMII::AddrModeT4:
+          case ARMII::AddrModeT1_4:
             Bits = 5;
             Scale = 4;  // +(offset_5*4)
             break;
-          case ARMII::AddrModeTs:
+          case ARMII::AddrModeT1_s:
             Bits = 8;
             Scale = 4;  // +(offset_8*4)
             break;
+          case ARMII::AddrModeT2_pc:
+            Bits = 12;  // +-offset_12
+            break;
           }
 
           // Remember that this is a user of a CP entry.
@@ -572,7 +591,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
   // There doesn't seem to be meaningful DebugInfo available; this doesn't
   // correspond to anything in the source.
   BuildMI(OrigBB, DebugLoc::getUnknownLoc(),
-          TII->get(isThumb ? ARM::tB : ARM::B)).addMBB(NewBB);
+          TII->get(isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B)).addMBB(NewBB);
   NumSplit++;
 
   // Update the CFG.  All succs of OrigBB are now succs of NewBB.
@@ -716,7 +735,8 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
   MachineBasicBlock *Succ = *MBB->succ_begin();
   MachineBasicBlock *Pred = *MBB->pred_begin();
   MachineInstr *PredMI = &Pred->back();
-  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB)
+  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
+      || PredMI->getOpcode() == ARM::t2B)
     return PredMI->getOperand(0).getMBB() == Succ;
   return false;
 }
@@ -748,7 +768,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
         // Thumb jump tables require padding.  They should be at the end;
         // following unconditional branches are removed by AnalyzeBranch.
         MachineInstr *ThumbJTMI = NULL;
-        if (prior(MBB->end())->getOpcode() == ARM::tBR_JTr)
+        if ((prior(MBB->end())->getOpcode() == ARM::tBR_JTr)
+            || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTr)
+            || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTm)
+            || (prior(MBB->end())->getOpcode() == ARM::t2BR_JTadd))
           ThumbJTMI = prior(MBB->end());
         if (ThumbJTMI) {
           unsigned newMIOffset = GetOffsetOf(ThumbJTMI);
@@ -839,7 +862,16 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
 /// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
 /// the specific unconditional branch instruction.
 static inline unsigned getUnconditionalBrDisp(int Opc) {
-  return (Opc == ARM::tB) ? ((1<<10)-1)*2 : ((1<<23)-1)*4;
+  switch (Opc) {
+  case ARM::tB:
+    return ((1<<10)-1)*2;
+  case ARM::t2B:
+    return ((1<<23)-1)*2;
+  default:
+    break;
+  }
+  
+  return ((1<<23)-1)*4;
 }
 
 /// AcceptWater - Small amount of common code factored out of the following.
@@ -935,7 +967,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
     // range, but if the preceding conditional branch is out of range, the
     // targets will be exchanged, and the altered branch may be out of
     // range, so the machinery has to know about it.
-    int UncondBr = isThumb ? ARM::tB : ARM::B;
+    int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
     BuildMI(UserMBB, DebugLoc::getUnknownLoc(),
             TII->get(UncondBr)).addMBB(*NewMBB);
     unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
@@ -1165,7 +1197,7 @@ bool
 ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *MBB = MI->getParent();
-  assert(isThumb && "Expected a Thumb function!");
+  assert(isThumb && !isThumb2 && "Expected a Thumb-1 function!");
 
   // Use BL to implement far jump.
   Br.MaxDisp = (1 << 21) * 2;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 200371b..6485fc1 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -64,6 +64,8 @@ public:
 
   SDNode *Select(SDValue Op);
   virtual void InstructionSelect();
+  bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
+                               SDValue &B, SDValue &C);
   bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode2Offset(SDValue Op, SDValue N,
@@ -74,9 +76,11 @@ public:
                              SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Offset);
+  bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update,
+                       SDValue &Opc);
 
   bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
-                         SDValue &Label);
+                        SDValue &Label);
 
   bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base,
                              SDValue &Offset);
@@ -92,20 +96,34 @@ public:
   bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
                              SDValue &OffImm);
 
-  bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
-                               SDValue &B, SDValue &C);
   bool SelectT2ShifterOperandReg(SDValue Op, SDValue N,
                                  SDValue &BaseReg, SDValue &Opc);
-  
+  bool SelectT2AddrModeImm12(SDValue Op, SDValue N, SDValue &Base,
+                             SDValue &OffImm);
+  bool SelectT2AddrModeImm8(SDValue Op, SDValue N, SDValue &Base,
+                            SDValue &OffImm);
+  bool SelectT2AddrModeImm8Offset(SDValue Op, SDValue N,
+                                 SDValue &OffImm);
+  bool SelectT2AddrModeImm8s4(SDValue Op, SDValue N, SDValue &Base,
+                              SDValue &OffImm);
+  bool SelectT2AddrModeSoReg(SDValue Op, SDValue N, SDValue &Base,
+                             SDValue &OffReg, SDValue &ShImm);
+
   // Include the pieces autogenerated from the target description.
 #include "ARMGenDAGISel.inc"
 
 private:
-    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
-    /// inline asm expressions.
-    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                              char ConstraintCode,
-                                              std::vector<SDValue> &OutOps);
+  /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
+  /// ARM.
+  SDNode *SelectARMIndexedLoad(SDValue Op);
+  SDNode *SelectT2IndexedLoad(SDValue Op);
+
+
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
 };
 }
 
@@ -116,6 +134,30 @@ void ARMDAGToDAGISel::InstructionSelect() {
   CurDAG->RemoveDeadNodes();
 }
 
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
+                                              SDValue N,
+                                              SDValue &BaseReg,
+                                              SDValue &ShReg,
+                                              SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+  
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getZExtValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
+  }
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
+  return true;
+}
+
 bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
                                       SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
@@ -382,6 +424,16 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
   return true;
 }
 
+bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
+                                      SDValue &Addr, SDValue &Update,
+                                      SDValue &Opc) {
+  Addr = N;
+  // The optional writeback is handled in ARMLoadStoreOpt.
+  Update = CurDAG->getRegister(0, MVT::i32);
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
+  return true;
+}
+
 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N,
                                         SDValue &Offset, SDValue &Label) {
   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
@@ -519,30 +571,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
-                                              SDValue N,
-                                              SDValue &BaseReg,
-                                              SDValue &ShReg,
-                                              SDValue &Opc) {
-  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
-
-  // Don't match base register only case. That is matched to a separate
-  // lower complexity pattern with explicit register operand.
-  if (ShOpcVal == ARM_AM::no_shift) return false;
-  
-  BaseReg = N.getOperand(0);
-  unsigned ShImmVal = 0;
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    ShReg = CurDAG->getRegister(0, MVT::i32);
-    ShImmVal = RHS->getZExtValue() & 31;
-  } else {
-    ShReg = N.getOperand(1);
-  }
-  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
-                                  MVT::i32);
-  return true;
-}
-
 bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N,
                                                 SDValue &BaseReg,
                                                 SDValue &Opc) {
@@ -563,11 +591,250 @@ bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N,
   return false;
 }
 
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N,
+                                            SDValue &Base, SDValue &OffImm) {
+  // Match simple R + imm12 operands.
+  if (N.getOpcode() != ISD::ADD)
+    return false;
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits.
+      Base   = N.getOperand(0);
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue Op, SDValue N,
+                                           SDValue &Base, SDValue &OffImm) {
+  if (N.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+        Base   = N.getOperand(0);
+        OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+        return true;
+      }
+    }
+  } else if (N.getOpcode() == ISD::SUB) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC >= 0 && RHSC < 0x100) { // 8 bits.
+        Base   = N.getOperand(0);
+        OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDValue Op, SDValue N,
+                                                 SDValue &OffImm){
+  unsigned Opcode = Op.getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N)) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC >= 0 && RHSC < 0x100) { // 8 bits.
+      OffImm = (AM == ISD::PRE_INC)
+        ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+        : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDValue Op, SDValue N,
+                                             SDValue &Base, SDValue &OffImm) {
+  if (N.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (((RHSC & 0x3) == 0) && (RHSC < 0 && RHSC > -0x400)) { // 8 bits.
+        Base   = N.getOperand(0);
+        OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+        return true;
+      }
+    }
+  } else if (N.getOpcode() == ISD::SUB) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits.
+        Base   = N.getOperand(0);
+        OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue Op, SDValue N,
+                                            SDValue &Base,
+                                            SDValue &OffReg, SDValue &ShImm) {
+  // Base only.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper) {
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::TargetConstantPool)
+        return false;  // We want to select t2LDRpci instead.
+    }
+    OffReg = CurDAG->getRegister(0, MVT::i32);
+    ShImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  // Look for (R + R) or (R + (R << [1,2,3])).
+  unsigned ShAmt = 0;
+  Base   = N.getOperand(0);
+  OffReg = N.getOperand(1);
+
+  // Swap if it is ((R << c) + R).
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg);
+  if (ShOpcVal != ARM_AM::lsl) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(Base);
+    if (ShOpcVal == ARM_AM::lsl)
+      std::swap(Base, OffReg);
+  }  
+  
+  if (ShOpcVal == ARM_AM::lsl) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (ShAmt >= 4) {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      } else
+        OffReg = OffReg.getOperand(0);
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  } else if (SelectT2AddrModeImm12(Op, N, Base, ShImm) ||
+             SelectT2AddrModeImm8 (Op, N, Base, ShImm))
+    // Don't match if it's possible to match to one of the r +/- imm cases.
+    return false;
+  
+  ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
+
+  return true;
+}
+
+//===--------------------------------------------------------------------===//
+
 /// getAL - Returns a ARMCC::AL immediate node.
 static inline SDValue getAL(SelectionDAG *CurDAG) {
   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
 }
 
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDValue Op) {
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM == ISD::UNINDEXED)
+    return NULL;
+
+  MVT LoadedVT = LD->getMemoryVT();
+  SDValue Offset, AMOpc;
+  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+  unsigned Opcode = 0;
+  bool Match = false;
+  if (LoadedVT == MVT::i32 &&
+      SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+    Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+    Match = true;
+  } else if (LoadedVT == MVT::i16 &&
+             SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+    Match = true;
+    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+    if (LD->getExtensionType() == ISD::SEXTLOAD) {
+      if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+      }
+    } else {
+      if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+      }
+    }
+  }
+
+  if (Match) {
+    SDValue Chain = LD->getChain();
+    SDValue Base = LD->getBasePtr();
+    SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+                     CurDAG->getRegister(0, MVT::i32), Chain };
+    return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
+                                 MVT::Other, Ops, 6);
+  }
+
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) {
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM == ISD::UNINDEXED)
+    return NULL;
+
+  MVT LoadedVT = LD->getMemoryVT();
+  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+  SDValue Offset;
+  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+  unsigned Opcode = 0;
+  bool Match = false;
+  if (SelectT2AddrModeImm8Offset(Op, LD->getOffset(), Offset)) {
+    switch (LoadedVT.getSimpleVT()) {
+    case MVT::i32:
+      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
+      break;
+    case MVT::i16:
+      if (isSExtLd)
+        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
+      else
+        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
+      break;
+    case MVT::i8:
+    case MVT::i1:
+      if (isSExtLd)
+        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
+      else
+        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
+      break;
+    default:
+      return NULL;
+    }
+    Match = true;
+  }
+
+  if (Match) {
+    SDValue Chain = LD->getChain();
+    SDValue Base = LD->getBasePtr();
+    SDValue Ops[]= { Base, Offset, getAL(CurDAG),
+                     CurDAG->getRegister(0, MVT::i32), Chain };
+    return CurDAG->getTargetNode(Opcode, Op.getDebugLoc(), MVT::i32, MVT::i32,
+                                 MVT::Other, Ops, 5);
+  }
+
+  return NULL;
+}
+
 
 SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   SDNode *N = Op.getNode();
@@ -698,47 +965,13 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
   }
   case ISD::LOAD: {
-    LoadSDNode *LD = cast<LoadSDNode>(Op);
-    ISD::MemIndexedMode AM = LD->getAddressingMode();
-    MVT LoadedVT = LD->getMemoryVT();
-    if (AM != ISD::UNINDEXED) {
-      SDValue Offset, AMOpc;
-      bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
-      unsigned Opcode = 0;
-      bool Match = false;
-      if (LoadedVT == MVT::i32 &&
-          SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
-        Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
-        Match = true;
-      } else if (LoadedVT == MVT::i16 &&
-                 SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
-        Match = true;
-        Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
-          ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
-          : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
-      } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
-        if (LD->getExtensionType() == ISD::SEXTLOAD) {
-          if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
-            Match = true;
-            Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
-          }
-        } else {
-          if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
-            Match = true;
-            Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
-          }
-        }
-      }
-
-      if (Match) {
-        SDValue Chain = LD->getChain();
-        SDValue Base = LD->getBasePtr();
-        SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
-                           CurDAG->getRegister(0, MVT::i32), Chain };
-        return CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::i32,
-                                     MVT::Other, Ops, 6);
-      }
-    }
+    SDNode *ResNode = 0;
+    if (Subtarget->isThumb2())
+      ResNode = SelectT2IndexedLoad(Op);
+    else
+      ResNode = SelectARMIndexedLoad(Op);
+    if (ResNode)
+      return ResNode;
     // Other cases are autogenerated.
     break;
   }
@@ -751,7 +984,12 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
     // Pattern complexity = 6  cost = 1  size = 0
 
-    unsigned Opc = Subtarget->isThumb() ? ARM::tBcc : ARM::Bcc;
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    unsigned Opc = Subtarget->isThumb() ? 
+      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
     SDValue Chain = Op.getOperand(0);
     SDValue N1 = Op.getOperand(1);
     SDValue N2 = Op.getOperand(2);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c24bb2e..41c9ecc 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -231,16 +231,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 
   // ARM supports all 4 flavors of integer indexed load / store.
-  for (unsigned im = (unsigned)ISD::PRE_INC;
-       im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
-    setIndexedLoadAction(im,  MVT::i1,  Legal);
-    setIndexedLoadAction(im,  MVT::i8,  Legal);
-    setIndexedLoadAction(im,  MVT::i16, Legal);
-    setIndexedLoadAction(im,  MVT::i32, Legal);
-    setIndexedStoreAction(im, MVT::i1,  Legal);
-    setIndexedStoreAction(im, MVT::i8,  Legal);
-    setIndexedStoreAction(im, MVT::i16, Legal);
-    setIndexedStoreAction(im, MVT::i32, Legal);
+  if (!Subtarget->isThumb1Only()) {
+    for (unsigned im = (unsigned)ISD::PRE_INC;
+         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+      setIndexedLoadAction(im,  MVT::i1,  Legal);
+      setIndexedLoadAction(im,  MVT::i8,  Legal);
+      setIndexedLoadAction(im,  MVT::i16, Legal);
+      setIndexedLoadAction(im,  MVT::i32, Legal);
+      setIndexedStoreAction(im, MVT::i1,  Legal);
+      setIndexedStoreAction(im, MVT::i8,  Legal);
+      setIndexedStoreAction(im, MVT::i16, Legal);
+      setIndexedStoreAction(im, MVT::i32, Legal);
+    }
   }
 
   // i64 operation support.
@@ -301,7 +303,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
   setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
 
-  if (!Subtarget->hasV6Ops()) {
+  if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
   }
@@ -402,7 +404,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
   case ARMISD::CMP:           return "ARMISD::CMP";
-  case ARMISD::CMPNZ:         return "ARMISD::CMPNZ";
+  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
@@ -455,6 +457,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   }
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
+  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
+}
+
 //===----------------------------------------------------------------------===//
 // Lowering Code
 //===----------------------------------------------------------------------===//
@@ -1152,7 +1159,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   // FIXME: is there useful debug info available here?
   std::pair<SDValue, SDValue> CallResult =
     LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false,
-                CallingConv::C, false,
+                0, CallingConv::C, false,
                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
   return CallResult.first;
 }
@@ -1592,10 +1599,8 @@ static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     break;
   case ARMCC::EQ:
   case ARMCC::NE:
-  case ARMCC::MI:
-  case ARMCC::PL:
-    // Uses only N and Z Flags
-    CompareType = ARMISD::CMPNZ;
+    // Uses only Z Flag
+    CompareType = ARMISD::CMPZ;
     break;
   }
   ARMCC = DAG.getConstant(CondCode, MVT::i32);
@@ -2920,10 +2925,10 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return true;
 }
 
-static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
-                                   bool isSEXTLoad, SDValue &Base,
-                                   SDValue &Offset, bool &isInc,
-                                   SelectionDAG &DAG) {
+static bool getARMIndexedAddressParts(SDNode *Ptr, MVT VT,
+                                      bool isSEXTLoad, SDValue &Base,
+                                      SDValue &Offset, bool &isInc,
+                                      SelectionDAG &DAG) {
   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
     return false;
 
@@ -2933,6 +2938,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
       int RHSC = (int)RHS->getZExtValue();
       if (RHSC < 0 && RHSC > -256) {
+        assert(Ptr->getOpcode() == ISD::ADD);
         isInc = false;
         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
         return true;
@@ -2946,6 +2952,7 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
       int RHSC = (int)RHS->getZExtValue();
       if (RHSC < 0 && RHSC > -0x1000) {
+        assert(Ptr->getOpcode() == ISD::ADD);
         isInc = false;
         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
         Base = Ptr->getOperand(0);
@@ -2976,6 +2983,31 @@ static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
   return false;
 }
 
+static bool getT2IndexedAddressParts(SDNode *Ptr, MVT VT,
+                                     bool isSEXTLoad, SDValue &Base,
+                                     SDValue &Offset, bool &isInc,
+                                     SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+    return false;
+
+  Base = Ptr->getOperand(0);
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+      assert(Ptr->getOpcode() == ISD::ADD);
+      isInc = false;
+      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+      return true;
+    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
+      isInc = Ptr->getOpcode() == ISD::ADD;
+      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
+      return true;
+    }
+  }
+
+  return false;
+}
+
 /// getPreIndexedAddressParts - returns true by value, base pointer and
 /// offset pointer and addressing mode by reference if the node's address
 /// can be legally represented as pre-indexed load / store address.
@@ -2984,7 +3016,7 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
                                              SDValue &Offset,
                                              ISD::MemIndexedMode &AM,
                                              SelectionDAG &DAG) const {
-  if (Subtarget->isThumb())
+  if (Subtarget->isThumb1Only())
     return false;
 
   MVT VT;
@@ -3001,13 +3033,18 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
     return false;
 
   bool isInc;
-  bool isLegal = getIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset,
-                                        isInc, DAG);
-  if (isLegal) {
-    AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
-    return true;
-  }
-  return false;
+  bool isLegal = false;
+  if (Subtarget->isThumb2())
+    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+                                       Offset, isInc, DAG);
+  else 
+    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+                                        Offset, isInc, DAG);
+  if (!isLegal)
+    return false;
+
+  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+  return true;
 }
 
 /// getPostIndexedAddressParts - returns true by value, base pointer and
@@ -3018,7 +3055,7 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
                                                    SDValue &Offset,
                                                    ISD::MemIndexedMode &AM,
                                                    SelectionDAG &DAG) const {
-  if (Subtarget->isThumb())
+  if (Subtarget->isThumb1Only())
     return false;
 
   MVT VT;
@@ -3033,13 +3070,18 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
     return false;
 
   bool isInc;
-  bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+  bool isLegal = false;
+  if (Subtarget->isThumb2())
+    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
                                         isInc, DAG);
-  if (isLegal) {
-    AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
-    return true;
-  }
-  return false;
+  else 
+    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                        isInc, DAG);
+  if (!isLegal)
+    return false;
+
+  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+  return true;
 }
 
 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 631e37f..553a86d 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -45,7 +45,7 @@ namespace llvm {
       PIC_ADD,      // Add with a PC operand and a PIC label.
 
       CMP,          // ARM compare instructions.
-      CMPNZ,        // ARM compare that uses only N or Z flags.
+      CMPZ,         // ARM compare that sets only Z flag.
       CMPFP,        // ARM VFP compare instruction, sets FPSCR.
       CMPFPw0,      // ARM VFP compare against zero instruction, sets FPSCR.
       FMSTAT,       // ARM fmstat instruction.
@@ -197,6 +197,9 @@ namespace llvm {
       return Subtarget;
     }
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
   private:
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     /// make the right decision when generating code for different targets.
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index d7371b0..301a6c1 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -59,6 +59,49 @@ def NEONDupFrm    : Format<27>;
 class UnaryDP  { bit isUnaryDataProc = 1; }
 
 //===----------------------------------------------------------------------===//
+// ARM Instruction flags.  These need to match ARMInstrInfo.h.
+//
+
+// Addressing mode.
+class AddrMode<bits<4> val> {
+  bits<4> Value = val;
+}
+def AddrModeNone  : AddrMode<0>;
+def AddrMode1     : AddrMode<1>;
+def AddrMode2     : AddrMode<2>;
+def AddrMode3     : AddrMode<3>;
+def AddrMode4     : AddrMode<4>;
+def AddrMode5     : AddrMode<5>;
+def AddrMode6     : AddrMode<6>;
+def AddrModeT1_1  : AddrMode<7>;
+def AddrModeT1_2  : AddrMode<8>;
+def AddrModeT1_4  : AddrMode<9>;
+def AddrModeT1_s  : AddrMode<10>;
+def AddrModeT2_i12: AddrMode<12>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
+def AddrModeT2_i8s4 : AddrMode<15>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+  bits<3> Value = val;
+}
+def SizeInvalid  : SizeFlagVal<0>;  // Unset.
+def SizeSpecial  : SizeFlagVal<1>;  // Pseudo or special.
+def Size8Bytes   : SizeFlagVal<2>;
+def Size4Bytes   : SizeFlagVal<3>;
+def Size2Bytes   : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+  bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre  : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+
+//===----------------------------------------------------------------------===//
 
 // ARM Instruction templates.
 //
@@ -706,7 +749,6 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
 // Thumb Instruction Format Definitions.
 //
 
-
 // TI - Thumb instruction.
 
 class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
@@ -721,18 +763,6 @@ class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
 
 class TI<dag outs, dag ins, string asm, list<dag> pattern>
   : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
-class TI1<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeT1, Size2Bytes, asm, "", pattern>;
-class TI2<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeT2, Size2Bytes, asm, "", pattern>;
-class TI4<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeT4, Size2Bytes, asm, "", pattern>;
-class TIs<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeTs, Size2Bytes, asm, "", pattern>;
-
-// Two-address instructions
-class TIt<dag outs, dag ins, string asm, list<dag> pattern>
-  : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
 
 // BL, BLX(1) are translated by assembler into two instructions
 class TIx2<dag outs, dag ins, string asm, list<dag> pattern>
@@ -764,6 +794,18 @@ class Thumb1I<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
 
 class T1I<dag outs, dag ins, string asm, list<dag> pattern>
   : Thumb1I<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
+class T1I1<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeT1_1, Size2Bytes, asm, "", pattern>;
+class T1I2<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeT1_2, Size2Bytes, asm, "", pattern>;
+class T1I4<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeT1_4, Size2Bytes, asm, "", pattern>;
+class T1Is<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeT1_s, Size2Bytes, asm, "", pattern>;
+class T1Ix2<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
+class T1JTI<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>;
 
 // Two-address instructions
 class T1It<dag outs, dag ins, string asm, list<dag> pattern>
@@ -781,7 +823,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let InOperandList = !con(iops, (ops pred:$p));
   let AsmString = !strconcat(opc, !strconcat("${p}", asm));
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb, HasThumb2];
+  list<Predicate> Predicates = [IsThumb2];
 }
 
 // Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as
@@ -796,7 +838,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
   let AsmString   = !strconcat(opc, !strconcat("${s}${p}", asm));
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb, HasThumb2];
+  list<Predicate> Predicates = [IsThumb2];
 }
 
 // Special cases
@@ -807,21 +849,45 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let InOperandList = iops;
   let AsmString   = asm;
   let Pattern = pattern;
-  list<Predicate> Predicates = [IsThumb, HasThumb2];
+  list<Predicate> Predicates = [IsThumb2];
 }
 
 class T2I<dag oops, dag iops, string opc, string asm, list<dag> pattern>
   : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, opc, asm, "", pattern>;
+class T2Ii8<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, opc, asm, "", pattern>;
+class T2Ii8s4<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, opc, asm, "", pattern>;
 
 class T2sI<dag oops, dag iops, string opc, string asm, list<dag> pattern>
   : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
 
 class T2XI<dag oops, dag iops, string asm, list<dag> pattern>
   : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, asm, "", pattern>;
+
+// T2Iidxldst - Thumb2 indexed load / store instructions.
+class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im,
+                 string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, Size4Bytes, im, ThumbFrm, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+}
+
 
 // T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
 class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
-  list<Predicate> Predicates = [IsThumb, HasThumb2];
+  list<Predicate> Predicates = [IsThumb2];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index d95089d..443fdc7 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -40,108 +40,24 @@ const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
 }
 
 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI)
-  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
-    RI(*this, STI) {
+  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) {
 }
 
 ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI) {
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
-                               unsigned &SrcReg, unsigned &DstReg,
-                               unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
-  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
-  unsigned oc = MI.getOpcode();
-  switch (oc) {
-  default:
-    return false;
-  case ARM::FCPYS:
-  case ARM::FCPYD:
-  case ARM::VMOVD:
-  case ARM::VMOVQ:
-    SrcReg = MI.getOperand(1).getReg();
-    DstReg = MI.getOperand(0).getReg();
-    return true;
-  case ARM::MOVr:
-    assert(MI.getDesc().getNumOperands() >= 2 &&
-           MI.getOperand(0).isReg() &&
-           MI.getOperand(1).isReg() &&
-           "Invalid ARM MOV instruction");
-    SrcReg = MI.getOperand(1).getReg();
-    DstReg = MI.getOperand(0).getReg();
-    return true;
-  }
-}
-
-unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                           int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::LDR:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isReg() &&
-        MI->getOperand(3).isImm() &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::FLDD:
-  case ARM::FLDS:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                          int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case ARM::STR:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isReg() &&
-        MI->getOperand(3).isImm() &&
-        MI->getOperand(2).getReg() == 0 &&
-        MI->getOperand(3).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  case ARM::FSTD:
-  case ARM::FSTS:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-
-  return 0;
-}
-
-void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I,
-                                     unsigned DestReg,
-                                     const MachineInstr *Orig) const {
+void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I,
+                                 unsigned DestReg,
+                                 const MachineInstr *Orig) const {
   DebugLoc dl = Orig->getDebugLoc();
   if (Orig->getOpcode() == ARM::MOVi2pieces) {
-    RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(),
-                         Orig->getOperand(2).getImm(),
-                         Orig->getOperand(3).getReg(), this, false, dl);
+    RI.emitLoadConstPool(MBB, I, this, dl,
+                         DestReg,
+                         Orig->getOperand(1).getImm(),
+                         (ARMCC::CondCodes)Orig->getOperand(2).getImm(),
+                         Orig->getOperand(3).getReg());
     return;
   }
 
@@ -334,10 +250,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
 
 // Branch analysis.
 bool
-  ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
-                                  MachineBasicBlock *&FBB,
-                                  SmallVectorImpl<MachineOperand> &Cond,
-                                  bool AllowModify) const {
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
   // If the block has no terminators, it just falls into the block after it.
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
@@ -349,11 +265,11 @@ bool
   // If there is only one terminator instruction, process it.
   unsigned LastOpc = LastInst->getOpcode();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-    if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+    if (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B) {
       TBB = LastInst->getOperand(0).getMBB();
       return false;
     }
-    if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+    if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc || LastOpc == ARM::t2Bcc) {
       // Block ends with fall-through condbranch.
       TBB = LastInst->getOperand(0).getMBB();
       Cond.push_back(LastInst->getOperand(1));
@@ -370,10 +286,12 @@ bool
   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
     return true;
 
-  // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
+  // If the block ends with ARM::B/ARM::tB/ARM::t2B and a 
+  // ARM::Bcc/ARM::tBcc/ARM::t2Bcc, handle it.
   unsigned SecondLastOpc = SecondLastInst->getOpcode();
   if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
-      (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+      (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB) ||
+      (SecondLastOpc == ARM::t2Bcc && LastOpc == ARM::t2B)) {
     TBB =  SecondLastInst->getOperand(0).getMBB();
     Cond.push_back(SecondLastInst->getOperand(1));
     Cond.push_back(SecondLastInst->getOperand(2));
@@ -383,8 +301,9 @@ bool
 
   // If the block ends with two unconditional branches, handle it.  The second
   // one is not executed, so remove it.
-  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &&
-      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB || 
+       SecondLastOpc==ARM::t2B) &&
+      (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) {
     TBB = SecondLastInst->getOperand(0).getMBB();
     I = LastInst;
     if (AllowModify)
@@ -396,8 +315,10 @@ bool
   // branch. The branch folder can create these, and we must get rid of them for
   // correctness of Thumb constant islands.
   if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm ||
-       SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr) &&
-      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+       SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr ||
+       SecondLastOpc == ARM::t2BR_JTr || SecondLastOpc==ARM::t2BR_JTm ||
+       SecondLastOpc == ARM::t2BR_JTadd) &&
+      (LastOpc == ARM::B || LastOpc == ARM::tB || LastOpc == ARM::t2B)) {
     I = LastInst;
     if (AllowModify)
       I->eraseFromParent();
@@ -412,8 +333,10 @@ bool
 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
-  int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+  int BOpc   = AFI->isThumbFunction() ? 
+    (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B;
+  int BccOpc = AFI->isThumbFunction() ? 
+    (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
 
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin()) return 0;
@@ -444,8 +367,10 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   DebugLoc dl = DebugLoc::getUnknownLoc();
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
-  int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+  int BOpc   = AFI->isThumbFunction() ? 
+    (AFI->isThumb2Function() ? ARM::t2B : ARM::tB) : ARM::B;
+  int BccOpc = AFI->isThumbFunction() ? 
+    (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
 
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
@@ -468,11 +393,288 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   return 2;
 }
 
-bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
-                                MachineBasicBlock::iterator I,
-                                unsigned DestReg, unsigned SrcReg,
-                                const TargetRegisterClass *DestRC,
-                                const TargetRegisterClass *SrcRC) const {
+bool
+ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+  if (MBB.empty()) return false;
+
+  switch (MBB.back().getOpcode()) {
+  case ARM::BX_RET:   // Return.
+  case ARM::LDM_RET:
+  case ARM::tBX_RET:
+  case ARM::tBX_RET_vararg:
+  case ARM::tPOP_RET:
+  case ARM::B:
+  case ARM::tB:
+  case ARM::t2B:      // Uncond branch.
+  case ARM::tBR_JTr:
+  case ARM::t2BR_JTr:
+  case ARM::BR_JTr:   // Jumptable branch.
+  case ARM::t2BR_JTm:
+  case ARM::BR_JTm:   // Jumptable branch through mem.
+  case ARM::t2BR_JTadd:
+  case ARM::BR_JTadd: // Jumptable branch add to pc.
+    return true;
+  default: return false;
+  }
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+  return false;
+}
+
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+  int PIdx = MI->findFirstPredOperandIdx();
+  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B) {
+    MI->setDesc(get((Opc == ARM::B) ? ARM::Bcc :
+                    ((Opc == ARM::tB) ? ARM::tBcc : ARM::t2Bcc)));
+    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+    return true;
+  }
+
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx != -1) {
+    MachineOperand &PMO = MI->getOperand(PIdx);
+    PMO.setImm(Pred[0].getImm());
+    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+    return true;
+  }
+  return false;
+}
+
+bool ARMBaseInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  if (Pred1.size() > 2 || Pred2.size() > 2)
+    return false;
+
+  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+  if (CC1 == CC2)
+    return true;
+
+  switch (CC1) {
+  default:
+    return false;
+  case ARMCC::AL:
+    return true;
+  case ARMCC::HS:
+    return CC2 == ARMCC::HI;
+  case ARMCC::LS:
+    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+  case ARMCC::GE:
+    return CC2 == ARMCC::GT;
+  case ARMCC::LE:
+    return CC2 == ARMCC::LT;
+  }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                    std::vector<MachineOperand> &Pred) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+    return false;
+
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+      Pred.push_back(MO);
+      Found = true;
+    }
+  }
+
+  return Found;
+}
+
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) DISABLE_INLINE;
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) {
+  return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  const MachineBasicBlock &MBB = *MI->getParent();
+  const MachineFunction *MF = MBB.getParent();
+  const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
+
+  // Basic size info comes from the TSFlags field.
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned TSFlags = TID.TSFlags;
+
+  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+  default: {
+    // If this machine instr is an inline asm, measure it.
+    if (MI->getOpcode() == ARM::INLINEASM)
+      return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
+    if (MI->isLabel())
+      return 0;
+    switch (MI->getOpcode()) {
+    default:
+      assert(0 && "Unknown or unset size field for instr!");
+      break;
+    case TargetInstrInfo::IMPLICIT_DEF:
+    case TargetInstrInfo::DECLARE:
+    case TargetInstrInfo::DBG_LABEL:
+    case TargetInstrInfo::EH_LABEL:
+      return 0;
+    }
+    break;
+  }
+  case ARMII::Size8Bytes: return 8;          // Arm instruction x 2.
+  case ARMII::Size4Bytes: return 4;          // Arm instruction.
+  case ARMII::Size2Bytes: return 2;          // Thumb instruction.
+  case ARMII::SizeSpecial: {
+    switch (MI->getOpcode()) {
+    case ARM::CONSTPOOL_ENTRY:
+      // If this machine instr is a constant pool entry, its size is recorded as
+      // operand #2.
+      return MI->getOperand(2).getImm();
+    case ARM::Int_eh_sjlj_setjmp: return 12;
+    case ARM::BR_JTr:
+    case ARM::BR_JTm:
+    case ARM::BR_JTadd:
+    case ARM::t2BR_JTr:
+    case ARM::t2BR_JTm:
+    case ARM::t2BR_JTadd:
+    case ARM::tBR_JTr: {
+      // These are jumptable branches, i.e. a branch followed by an inlined
+      // jumptable. The size is 4 + 4 * number of entries.
+      unsigned NumOps = TID.getNumOperands();
+      MachineOperand JTOP =
+        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+      unsigned JTI = JTOP.getIndex();
+      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+      assert(JTI < JT.size());
+      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+      // 4 aligned. The assembler / linker may add 2 byte padding just before
+      // the JT entries.  The size does not include this padding; the
+      // constant islands pass does separate bookkeeping for it.
+      // FIXME: If we know the size of the function is less than (1 << 16) *2
+      // bytes, we can use 16-bit entries instead. Then there won't be an
+      // alignment issue.
+      return getNumJTEntries(JT, JTI) * 4 +
+        ((MI->getOpcode()==ARM::tBR_JTr) ? 2 : 4);
+    }
+    default:
+      // Otherwise, pseudo-instruction sizes are zero.
+      return 0;
+    }
+  }
+  }
+  return 0; // Not reached
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool
+ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
+                              unsigned &SrcReg, unsigned &DstReg,
+                              unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+  unsigned oc = MI.getOpcode();
+  switch (oc) {
+  default:
+    return false;
+  case ARM::FCPYS:
+  case ARM::FCPYD:
+  case ARM::VMOVD:
+  case ARM::VMOVQ:
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  case ARM::MOVr:
+    assert(MI.getDesc().getNumOperands() >= 2 &&
+           MI.getOperand(0).isReg() &&
+           MI.getOperand(1).isReg() &&
+           "Invalid ARM MOV instruction");
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+}
+
+unsigned 
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::LDR:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FLDD:
+  case ARM::FLDS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                     int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::STR:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FSTD:
+  case ARM::FSTS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
+bool
+ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I,
+                               unsigned DestReg, unsigned SrcReg,
+                               const TargetRegisterClass *DestRC,
+                               const TargetRegisterClass *SrcRC) const {
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
@@ -498,7 +700,7 @@ bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   return true;
 }
 
-void ARMInstrInfo::
+void ARMBaseInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC) const {
@@ -521,11 +723,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
-void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                  bool isKill,
-                                  SmallVectorImpl<MachineOperand> &Addr,
-                                  const TargetRegisterClass *RC,
-                                  SmallVectorImpl<MachineInstr*> &NewMIs) const{
+void 
+ARMBaseInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                 bool isKill,
+                                 SmallVectorImpl<MachineOperand> &Addr,
+                                 const TargetRegisterClass *RC,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const{
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
   if (RC == ARM::GPRRegisterClass) {
@@ -546,7 +749,7 @@ void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
   return;
 }
 
-void ARMInstrInfo::
+void ARMBaseInstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
@@ -566,7 +769,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
-void ARMInstrInfo::
+void ARMBaseInstrInfo::
 loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                 SmallVectorImpl<MachineOperand> &Addr,
                 const TargetRegisterClass *RC,
@@ -590,7 +793,7 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   return;
 }
 
-MachineInstr *ARMInstrInfo::
+MachineInstr *ARMBaseInstrInfo::
 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                       const SmallVectorImpl<unsigned> &Ops, int FI) const {
   if (Ops.size() != 1) return NULL;
@@ -609,14 +812,19 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
-        .addReg(SrcReg, getKillRegState(isKill))
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
         .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(isDead) |
+                getUndefRegState(isUndef))
         .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
     }
     break;
@@ -626,14 +834,22 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     unsigned PredReg = MI->getOperand(3).getReg();
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
-        .addReg(SrcReg).addFrameIndex(FI)
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+        .addFrameIndex(FI)
         .addImm(0).addImm(Pred).addReg(PredReg);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS), DstReg)
-        .addFrameIndex(FI)
-        .addImm(0).addImm(Pred).addReg(PredReg);
+      bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(isDead) |
+                getUndefRegState(isUndef))
+        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
     break;
   }
@@ -643,14 +859,19 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
-        .addReg(SrcReg, getKillRegState(isKill))
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addReg(DstReg,
+                RegState::Define |
+                getDeadRegState(isDead) |
+                getUndefRegState(isUndef))
         .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
     }
     break;
@@ -660,35 +881,25 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
   return NewMI;
 }
 
-bool ARMBaseInstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
-                     const SmallVectorImpl<unsigned> &Ops) const {
+MachineInstr* 
+ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                        MachineInstr* MI,
+                                        const SmallVectorImpl<unsigned> &Ops,
+                                        MachineInstr* LoadMI) const {
+  return 0;
+}
+
+bool
+ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                       const SmallVectorImpl<unsigned> &Ops) const {
   if (Ops.size() != 1) return false;
 
-  unsigned OpNum = Ops[0];
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
   default: break;
   case ARM::MOVr:
     // If it is updating CPSR, then it cannot be folded.
     return MI->getOperand(4).getReg() != ARM::CPSR;
-  case ARM::tMOVr:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVhir2hir: {
-    if (OpNum == 0) { // move -> store
-      unsigned SrcReg = MI->getOperand(1).getReg();
-      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
-        // tSpill cannot take a high register operand.
-        return false;
-    } else {          // move -> load
-      unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
-        // tRestore cannot target a high register operand.
-        return false;
-    }
-    return true;
-  }
   case ARM::FCPYS:
   case ARM::FCPYD:
     return true;
@@ -700,183 +911,3 @@ canFoldMemoryOperand(const MachineInstr *MI,
 
   return false;
 }
-
-bool
-  ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  if (MBB.empty()) return false;
-
-  switch (MBB.back().getOpcode()) {
-  case ARM::BX_RET:   // Return.
-  case ARM::LDM_RET:
-  case ARM::tBX_RET:
-  case ARM::tBX_RET_vararg:
-  case ARM::tPOP_RET:
-  case ARM::B:
-  case ARM::tB:       // Uncond branch.
-  case ARM::tBR_JTr:
-  case ARM::BR_JTr:   // Jumptable branch.
-  case ARM::BR_JTm:   // Jumptable branch through mem.
-  case ARM::BR_JTadd: // Jumptable branch add to pc.
-    return true;
-  default: return false;
-  }
-}
-
-bool ARMBaseInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
-  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
-  return false;
-}
-
-bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
-  int PIdx = MI->findFirstPredOperandIdx();
-  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
-}
-
-bool ARMBaseInstrInfo::
-PredicateInstruction(MachineInstr *MI,
-                     const SmallVectorImpl<MachineOperand> &Pred) const {
-  unsigned Opc = MI->getOpcode();
-  if (Opc == ARM::B || Opc == ARM::tB) {
-    MI->setDesc(get(Opc == ARM::B ? ARM::Bcc : ARM::tBcc));
-    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
-    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
-    return true;
-  }
-
-  int PIdx = MI->findFirstPredOperandIdx();
-  if (PIdx != -1) {
-    MachineOperand &PMO = MI->getOperand(PIdx);
-    PMO.setImm(Pred[0].getImm());
-    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
-    return true;
-  }
-  return false;
-}
-
-bool ARMBaseInstrInfo::
-SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                  const SmallVectorImpl<MachineOperand> &Pred2) const {
-  if (Pred1.size() > 2 || Pred2.size() > 2)
-    return false;
-
-  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
-  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
-  if (CC1 == CC2)
-    return true;
-
-  switch (CC1) {
-  default:
-    return false;
-  case ARMCC::AL:
-    return true;
-  case ARMCC::HS:
-    return CC2 == ARMCC::HI;
-  case ARMCC::LS:
-    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
-  case ARMCC::GE:
-    return CC2 == ARMCC::GT;
-  case ARMCC::LE:
-    return CC2 == ARMCC::LT;
-  }
-}
-
-bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
-                                    std::vector<MachineOperand> &Pred) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
-    return false;
-
-  bool Found = false;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
-      Pred.push_back(MO);
-      Found = true;
-    }
-  }
-
-  return Found;
-}
-
-
-/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
-                                unsigned JTI) DISABLE_INLINE;
-static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
-                                unsigned JTI) {
-  return JT[JTI].MBBs.size();
-}
-
-/// GetInstSize - Return the size of the specified MachineInstr.
-///
-unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  const MachineBasicBlock &MBB = *MI->getParent();
-  const MachineFunction *MF = MBB.getParent();
-  const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
-
-  // Basic size info comes from the TSFlags field.
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned TSFlags = TID.TSFlags;
-
-  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
-  default: {
-    // If this machine instr is an inline asm, measure it.
-    if (MI->getOpcode() == ARM::INLINEASM)
-      return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
-    if (MI->isLabel())
-      return 0;
-    switch (MI->getOpcode()) {
-    default:
-      assert(0 && "Unknown or unset size field for instr!");
-      break;
-    case TargetInstrInfo::IMPLICIT_DEF:
-    case TargetInstrInfo::DECLARE:
-    case TargetInstrInfo::DBG_LABEL:
-    case TargetInstrInfo::EH_LABEL:
-      return 0;
-    }
-    break;
-  }
-  case ARMII::Size8Bytes: return 8;          // Arm instruction x 2.
-  case ARMII::Size4Bytes: return 4;          // Arm instruction.
-  case ARMII::Size2Bytes: return 2;          // Thumb instruction.
-  case ARMII::SizeSpecial: {
-    switch (MI->getOpcode()) {
-    case ARM::CONSTPOOL_ENTRY:
-      // If this machine instr is a constant pool entry, its size is recorded as
-      // operand #2.
-      return MI->getOperand(2).getImm();
-    case ARM::Int_eh_sjlj_setjmp: return 12;
-    case ARM::BR_JTr:
-    case ARM::BR_JTm:
-    case ARM::BR_JTadd:
-    case ARM::tBR_JTr: {
-      // These are jumptable branches, i.e. a branch followed by an inlined
-      // jumptable. The size is 4 + 4 * number of entries.
-      unsigned NumOps = TID.getNumOperands();
-      MachineOperand JTOP =
-        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
-      unsigned JTI = JTOP.getIndex();
-      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-      assert(JTI < JT.size());
-      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
-      // 4 aligned. The assembler / linker may add 2 byte padding just before
-      // the JT entries.  The size does not include this padding; the
-      // constant islands pass does separate bookkeeping for it.
-      // FIXME: If we know the size of the function is less than (1 << 16) *2
-      // bytes, we can use 16-bit entries instead. Then there won't be an
-      // alignment issue.
-      return getNumJTEntries(JT, JTI) * 4 +
-             (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4);
-    }
-    default:
-      // Otherwise, pseudo-instruction sizes are zero.
-      return 0;
-    }
-  }
-  }
-  return 0; // Not reached
-}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 131960b..8c8f788 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -33,16 +33,22 @@ namespace ARMII {
     // This four-bit field describes the addressing mode used.
 
     AddrModeMask  = 0xf,
-    AddrModeNone  = 0,
-    AddrMode1     = 1,
-    AddrMode2     = 2,
-    AddrMode3     = 3,
-    AddrMode4     = 4,
-    AddrMode5     = 5,
-    AddrModeT1    = 6,
-    AddrModeT2    = 7,
-    AddrModeT4    = 8,
-    AddrModeTs    = 9,  // i8 * 4 for pc and sp relative data
+    AddrModeNone    = 0,
+    AddrMode1       = 1,
+    AddrMode2       = 2,
+    AddrMode3       = 3,
+    AddrMode4       = 4,
+    AddrMode5       = 5,
+    AddrMode6       = 6,
+    AddrModeT1_1    = 7,
+    AddrModeT1_2    = 8,
+    AddrModeT1_4    = 9,
+    AddrModeT1_s    = 10, // i8 * 4 for pc and sp relative data
+    AddrModeT2_i12  = 11,
+    AddrModeT2_i8   = 12,
+    AddrModeT2_so   = 13,
+    AddrModeT2_pc   = 14, // +/- i12 for pc relative data
+    AddrModeT2_i8s4 = 15, // i8 * 4
 
     // Size* - Flags to keep track of the size of an instruction.
     SizeShift     = 4,
@@ -147,25 +153,16 @@ namespace ARMII {
 }
 
 class ARMBaseInstrInfo : public TargetInstrInfoImpl {
-  const ARMRegisterInfo RI;
 protected:
   // Can be only subclassed.
   explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
 public:
-
-  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-  /// such, whenever a client has an instance of instruction info, it should
-  /// always be able to get register info as well (through this method).
-  ///
-  virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; }
-
-  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
-                     unsigned DestReg, const MachineInstr *Orig) const;
-
   virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
                                               MachineBasicBlock::iterator &MBBI,
                                               LiveVariables *LV) const;
 
+  virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+
   // Branch analysis.
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                              MachineBasicBlock *&FBB,
@@ -176,9 +173,6 @@ public:
                                 MachineBasicBlock *FBB,
                             const SmallVectorImpl<MachineOperand> &Cond) const;
 
-  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops) const;
-
   virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -206,11 +200,6 @@ public:
   /// GetInstSize - Returns the size of the specified MachineInstr.
   ///
   virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
-};
-
-class ARMInstrInfo : public ARMBaseInstrInfo {
-public:
-  explicit ARMInstrInfo(const ARMSubtarget &STI);
 
   /// Return true if the instruction is a register to register move and return
   /// the source and dest operands and their sub-register indices by reference.
@@ -248,17 +237,33 @@ public:
                                const TargetRegisterClass *RC,
                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
 
+  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+  
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
+                                              const SmallVectorImpl<unsigned> &Ops,
                                               int FrameIndex) const;
 
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
-                                              MachineInstr* LoadMI) const {
-    return 0;
-  }
+                                              const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const;
+};
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+  ARMRegisterInfo RI;
+public:
+  explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+
+  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                     unsigned DestReg, const MachineInstr *Orig) const;
 };
 
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cb7b7b9..408f47a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -75,8 +75,8 @@ def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
 def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
                               [SDNPOutFlag]>;
 
-def ARMcmpNZ         : SDNode<"ARMISD::CMPNZ", SDT_ARMCmp,
-                              [SDNPOutFlag]>;
+def ARMcmpZ          : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+                              [SDNPOutFlag,SDNPCommutative]>;
 
 def ARMpic_add       : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
 
@@ -99,7 +99,7 @@ def HasVFP3   : Predicate<"Subtarget->hasVFP3()">;
 def HasNEON   : Predicate<"Subtarget->hasNEON()">;
 def IsThumb   : Predicate<"Subtarget->isThumb()">;
 def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
-def HasThumb2 : Predicate<"Subtarget->hasThumb2()">;
+def IsThumb2  : Predicate<"Subtarget->isThumb2()">;
 def IsARM     : Predicate<"!Subtarget->isThumb()">;
 def IsDarwin    : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
@@ -287,6 +287,14 @@ def addrmode5 : Operand<i32>,
   let MIOperandInfo = (ops GPR, i32imm);
 }
 
+// addrmode6 := reg with optional writeback
+//
+def addrmode6 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode6", []> {
+  let PrintMethod = "printAddrMode6Operand";
+  let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm);
+}
+
 // addrmodepc := pc + reg
 //
 def addrmodepc : Operand<i32>,
@@ -309,43 +317,6 @@ def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
 }
 
 //===----------------------------------------------------------------------===//
-// ARM Instruction flags.  These need to match ARMInstrInfo.h.
-//
-
-// Addressing mode.
-class AddrMode<bits<4> val> {
-  bits<4> Value = val;
-}
-def AddrModeNone : AddrMode<0>;
-def AddrMode1    : AddrMode<1>;
-def AddrMode2    : AddrMode<2>;
-def AddrMode3    : AddrMode<3>;
-def AddrMode4    : AddrMode<4>;
-def AddrMode5    : AddrMode<5>;
-def AddrModeT1   : AddrMode<6>;
-def AddrModeT2   : AddrMode<7>;
-def AddrModeT4   : AddrMode<8>;
-def AddrModeTs   : AddrMode<9>;
-
-// Instruction size.
-class SizeFlagVal<bits<3> val> {
-  bits<3> Value = val;
-}
-def SizeInvalid  : SizeFlagVal<0>;  // Unset.
-def SizeSpecial  : SizeFlagVal<1>;  // Pseudo or special.
-def Size8Bytes   : SizeFlagVal<2>;
-def Size4Bytes   : SizeFlagVal<3>;
-def Size2Bytes   : SizeFlagVal<4>;
-
-// Load / store index mode.
-class IndexMode<bits<2> val> {
-  bits<2> Value = val;
-}
-def IndexModeNone : IndexMode<0>;
-def IndexModePre  : IndexMode<1>;
-def IndexModePost : IndexMode<2>;
-
-//===----------------------------------------------------------------------===//
 
 include "ARMInstrFormats.td"
 
@@ -780,7 +751,7 @@ def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
 
 def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
                       (ins GPR:$base,am3offset:$offset), LdMiscFrm,
-                      "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
+                    "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
 }
 
 // Store
@@ -1309,19 +1280,19 @@ defm CMN  : AI1_cmp_irs<0b1011, "cmn",
 
 // Note that TST/TEQ don't set all the same flags that CMP does!
 defm TST  : AI1_cmp_irs<0b1000, "tst",
-                        BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>, 1>;
+                        BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>;
 defm TEQ  : AI1_cmp_irs<0b1001, "teq",
-                        BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>, 1>;
+                        BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>;
 
-defm CMPnz : AI1_cmp_irs<0b1010, "cmp",
-                         BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
-defm CMNnz : AI1_cmp_irs<0b1011, "cmn",
-                         BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
+defm CMPz  : AI1_cmp_irs<0b1010, "cmp",
+                         BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
+defm CMNz  : AI1_cmp_irs<0b1011, "cmn",
+                         BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
 
 def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
              (CMNri  GPR:$src, so_imm_neg:$imm)>;
 
-def : ARMPat<(ARMcmpNZ GPR:$src, so_imm_neg:$imm),
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
              (CMNri  GPR:$src, so_imm_neg:$imm)>;
 
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 7927ca5..904d9b1 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -187,23 +187,24 @@ let isCall = 1,
 let isBranch = 1, isTerminator = 1 in {
   let isBarrier = 1 in {
     let isPredicable = 1 in
-    def tB   : TI<(outs), (ins brtarget:$target), "b $target",
-                  [(br bb:$target)]>;
+    def tB   : T1I<(outs), (ins brtarget:$target), "b $target",
+                   [(br bb:$target)]>;
 
   // Far jump
-  def tBfar : TIx2<(outs), (ins brtarget:$target), "bl $target\t@ far jump",[]>;
+  def tBfar : T1Ix2<(outs), (ins brtarget:$target), 
+                    "bl $target\t@ far jump",[]>;
 
-  def tBR_JTr : TJTI<(outs),
-                     (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
-                     "cpy pc, $target \n\t.align\t2\n$jt",
-                     [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>;
+  def tBR_JTr : T1JTI<(outs),
+                      (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
+                      "cpy pc, $target \n\t.align\t2\n$jt",
+                      [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>;
   }
 }
 
 // FIXME: should be able to write a pattern for ARMBrcond, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let isBranch = 1, isTerminator = 1 in
-  def tBcc : TI<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target",
+  def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target",
                  [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
 
 //===----------------------------------------------------------------------===//
@@ -211,68 +212,68 @@ let isBranch = 1, isTerminator = 1 in
 //
 
 let canFoldAsLoad = 1 in
-def tLDR : TI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr),
+def tLDR : T1I4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr),
                "ldr $dst, $addr",
                [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>;
 
-def tLDRB : TI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr),
+def tLDRB : T1I1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr),
                 "ldrb $dst, $addr",
                 [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>;
 
-def tLDRH : TI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr),
+def tLDRH : T1I2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr),
                 "ldrh $dst, $addr",
                 [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>;
 
-def tLDRSB : TI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+def tLDRSB : T1I1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
                  "ldrsb $dst, $addr",
                  [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
 
-def tLDRSH : TI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+def tLDRSH : T1I2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
                  "ldrsh $dst, $addr",
                  [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
 
 let canFoldAsLoad = 1 in
-def tLDRspi : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+def tLDRspi : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
                   "ldr $dst, $addr",
                   [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>;
 
 // Special instruction for restore. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
 let canFoldAsLoad = 1, mayLoad = 1 in
-def tRestore : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+def tRestore : T1Is<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
                     "ldr $dst, $addr", []>;
 
 // Load tconstpool
 let canFoldAsLoad = 1 in
-def tLDRpci : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+def tLDRpci : T1Is<(outs tGPR:$dst), (ins i32imm:$addr),
                   "ldr $dst, $addr",
                   [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
 
 // Special LDR for loads from non-pc-relative constpools.
 let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
-def tLDRcp  : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+def tLDRcp  : T1Is<(outs tGPR:$dst), (ins i32imm:$addr),
                   "ldr $dst, $addr", []>;
 
-def tSTR : TI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr),
+def tSTR : T1I4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr),
                "str $src, $addr",
                [(store tGPR:$src, t_addrmode_s4:$addr)]>;
 
-def tSTRB : TI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr),
+def tSTRB : T1I1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr),
                  "strb $src, $addr",
                  [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>;
 
-def tSTRH : TI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr),
+def tSTRH : T1I2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr),
                  "strh $src, $addr",
                  [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>;
 
-def tSTRspi : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+def tSTRspi : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
                    "str $src, $addr",
                    [(store tGPR:$src, t_addrmode_sp:$addr)]>;
 
 let mayStore = 1 in {
 // Special instruction for spill. It cannot clobber condition register
 // when it's expanded by eliminateCallFramePseudoInstr().
-def tSpill : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+def tSpill : T1Is<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
                   "str $src, $addr", []>;
 }
 
@@ -362,9 +363,9 @@ let Defs = [CPSR] in {
 def tCMN : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
               "cmn $lhs, $rhs",
               [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
-def tCMNNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-                "cmn $lhs, $rhs",
-                [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
+def tCMNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+               "cmn $lhs, $rhs",
+               [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
 }
 
 // CMP immediate
@@ -372,9 +373,9 @@ let Defs = [CPSR] in {
 def tCMPi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
                "cmp $lhs, $rhs",
                [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
-def tCMPNZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
-                 "cmp $lhs, $rhs",
-                 [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>;
+def tCMPZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+                "cmp $lhs, $rhs",
+                [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>;
 
 }
 
@@ -383,9 +384,9 @@ let Defs = [CPSR] in {
 def tCMPr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
                "cmp $lhs, $rhs",
                [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
-def tCMPNZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-                 "cmp $lhs, $rhs",
-                 [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>;
+def tCMPZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+                "cmp $lhs, $rhs",
+                [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>;
 }
 
 // TODO: A7-37: CMP(3) - cmp hi regs
@@ -551,7 +552,7 @@ def tSXTH  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
 let isCommutable = 1, Defs = [CPSR] in
 def tTST  : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
                "tst $lhs, $rhs",
-               [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
+               [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
 
 // zero-extend byte
 def tUXTB  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
@@ -622,13 +623,13 @@ def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
 def : Tv5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
 
 // zextload i1 -> zextload i8
-def : TPat<(zextloadi1 t_addrmode_s1:$addr),
-           (tLDRB t_addrmode_s1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_s1:$addr),
+            (tLDRB t_addrmode_s1:$addr)>;
 
 // extload -> zextload
-def : TPat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : TPat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : TPat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
+def : T1Pat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
 
 // Large immediate handling.
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index bfdf719..50345a6 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -89,7 +89,6 @@ def imm0_65535 : PatLeaf<(i32 imm), [{
   return (uint32_t)N->getZExtValue() < 65536;
 }]>;
 
-
 /// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
 /// e.g., 0xf000ffff
 def bf_inv_mask_imm : Operand<i32>,
@@ -127,6 +126,42 @@ def t2_lo16AllZero : PatLeaf<(i32 imm), [{
   }], t2_hi16>;
 
 
+// Define Thumb2 specific addressing modes.
+
+// t2addrmode_imm12  := reg + imm12
+def t2addrmode_imm12 : Operand<i32>,
+                       ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
+  let PrintMethod = "printT2AddrModeImm12Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_imm8  := reg - imm8
+def t2addrmode_imm8 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+  let PrintMethod = "printT2AddrModeImm8Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+def t2am_imm8_offset : Operand<i32>,
+                       ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset", []>{
+  let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+}
+
+// t2addrmode_imm8s4  := reg + (imm8 << 2)
+def t2addrmode_imm8s4 : Operand<i32>,
+                        ComplexPattern<i32, 2, "SelectT2AddrModeImm8s4", []> {
+  let PrintMethod = "printT2AddrModeImm8Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+
+// t2addrmode_so_reg  := reg + reg << imm2
+def t2addrmode_so_reg : Operand<i32>,
+                        ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
+  let PrintMethod = "printT2AddrModeSoRegOperand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+
 //===----------------------------------------------------------------------===//
 // Multiclass helpers...
 //
@@ -239,32 +274,32 @@ multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
                  opc, " $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+                 Requires<[IsThumb2, CarryDefIsUnused]>;
    // register
    def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
                  opc, " $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]> {
+                 Requires<[IsThumb2, CarryDefIsUnused]> {
      let isCommutable = Commutable;
    }
    // shifted register
    def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
                  opc, " $dst, $lhs, $rhs",
                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+                 Requires<[IsThumb2, CarryDefIsUnused]>;
    // Carry setting variants
    // shifted imm
    def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
                   !strconcat(opc, "s $dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
-                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                  Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
                   }
    // register
    def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
                   !strconcat(opc, "s $dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
-                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                  Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
                     let isCommutable = Commutable;
    }
@@ -272,7 +307,7 @@ multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
                   !strconcat(opc, "s $dst, $lhs, $rhs"),
                   [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
-                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                  Requires<[IsThumb2, CarryDefIsUsed]> {
                     let Defs = [CPSR];
    }
 }
@@ -287,24 +322,24 @@ multiclass T2I_rsc_is<string opc, PatFrag opnode> {
    def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
                  opc, " $dst, $rhs, $lhs",
                  [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+                 Requires<[IsThumb2, CarryDefIsUnused]>;
    // shifted register
    def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
                  opc, " $dst, $rhs, $lhs",
                  [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+                 Requires<[IsThumb2, CarryDefIsUnused]>;
    // shifted imm
    def Sri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
                  !strconcat(opc, "s $dst, $rhs, $lhs"),
                  [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                 Requires<[IsThumb2, CarryDefIsUsed]> {
                    let Defs = [CPSR];
    }
    // shifted register
    def Srs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
                  !strconcat(opc, "s $dst, $rhs, $lhs"),
                  [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
-                 Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                 Requires<[IsThumb2, CarryDefIsUsed]> {
                    let Defs = [CPSR];
    }
 }
@@ -359,6 +394,71 @@ multiclass T2I_cmp_is<string opc, PatFrag opnode> {
 }
 }
 
+/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
+multiclass T2I_ld<string opc, PatFrag opnode> {
+  def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr),
+                   opc, " $dst, $addr",
+                   [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]>;
+  def i8  : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr),
+                   opc, " $dst, $addr",
+                   [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]>;
+  def s   : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr),
+                   opc, " $dst, $addr",
+                   [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]>;
+  def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr),
+                   opc, " $dst, $addr",
+                   [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]>;
+}
+
+/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
+multiclass T2I_st<string opc, PatFrag opnode> {
+  def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr),
+                   opc, " $src, $addr",
+                   [(opnode GPR:$src, t2addrmode_imm12:$addr)]>;
+  def i8  : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr),
+                   opc, " $src, $addr",
+                   [(opnode GPR:$src, t2addrmode_imm8:$addr)]>;
+  def s   : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr),
+                   opc, " $src, $addr",
+                   [(opnode GPR:$src, t2addrmode_so_reg:$addr)]>;
+}
+
+/// T2I_picld - Defines the PIC load pattern.
+class T2I_picld<string opc, PatFrag opnode> :
+      T2I<(outs GPR:$dst), (ins addrmodepc:$addr),
+          !strconcat("${addr:label}:\n\t", opc), " $dst, $addr",
+          [(set GPR:$dst, (opnode addrmodepc:$addr))]>;
+
+/// T2I_picst - Defines the PIC store pattern.
+class T2I_picst<string opc, PatFrag opnode> :
+      T2I<(outs), (ins GPR:$src, addrmodepc:$addr),
+          !strconcat("${addr:label}:\n\t", opc), " $src, $addr",
+          [(opnode GPR:$src, addrmodepc:$addr)]>;
+
+
+/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_unary_rrot<string opc, PatFrag opnode> {
+  def r     : T2I<(outs GPR:$dst), (ins GPR:$Src),
+                  opc, " $dst, $Src",
+                 [(set GPR:$dst, (opnode GPR:$Src))]>;
+  def r_rot : T2I<(outs GPR:$dst), (ins GPR:$Src, i32imm:$rot),
+                  opc, " $dst, $Src, ror $rot",
+                 [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>;
+}
+
+/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_bin_rrot<string opc, PatFrag opnode> {
+  def rr     : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
+                  opc, " $dst, $LHS, $RHS",
+                  [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>;
+  def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+                  opc, " $dst, $LHS, $RHS, ror $rot",
+                  [(set GPR:$dst, (opnode GPR:$LHS,
+                                          (rotr GPR:$RHS, rot_imm:$rot)))]>;
+}
+
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
@@ -409,6 +509,209 @@ def t2ADDrSPs : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
 //  Load / store Instructions.
 //
 
+// Load
+let canFoldAsLoad = 1 in
+defm t2LDR   : T2I_ld<"ldr",  UnOpFrag<(load node:$Src)>>;
+
+// Loads with zero extension
+defm t2LDRH  : T2I_ld<"ldrh", UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB  : T2I_ld<"ldrb", UnOpFrag<(zextloadi8  node:$Src)>>;
+
+// Loads with sign extension
+defm t2LDRSH : T2I_ld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<"ldrsb", UnOpFrag<(sextloadi8  node:$Src)>>;
+
+let mayLoad = 1 in {
+// Load doubleword
+def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst), (ins t2addrmode_imm8s4:$addr),
+                       "ldrd", " $dst, $addr", []>;
+def t2LDRDpci : T2Ii8s4<(outs GPR:$dst), (ins i32imm:$addr),
+                       "ldrd", " $dst, $addr", []>;
+}
+
+// zextload i1 -> zextload i8
+def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+// extload -> zextload
+// FIXME: Reduce the number of patterns by legalizing extload to zextload
+// earlier?
+def : T2Pat<(extloadi1  t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi1  t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi1  t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi1  (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+def : T2Pat<(extloadi8  t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi8  t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi8  t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi8  (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
+            (t2LDRHi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_imm8:$addr),
+            (t2LDRHi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
+            (t2LDRHs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
+            (t2LDRHpci  tconstpool:$addr)>;
+
+// Indexed loads
+let mayLoad = 1 in {
+def t2LDR_PRE  : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldr", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+
+def t2LDR_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                           "ldr", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+
+def t2LDRB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldrb", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+def t2LDRB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                          "ldrb", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+
+def t2LDRH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldrh", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+def t2LDRH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                          "ldrh", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+
+def t2LDRSB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldrsb", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+def t2LDRSB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                         "ldrsb", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+
+def t2LDRSH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre,
+                            "ldrsh", " $dst, $addr!", "$addr.base = $base_wb",
+                            []>;
+def t2LDRSH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb),
+                            (ins GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                         "ldrsh", " $dst, [$base], $offset", "$base = $base_wb",
+                            []>;
+}
+
+// Store
+defm t2STR   : T2I_st<"str",  BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB  : T2I_st<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH  : T2I_st<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+// Store doubleword
+let mayLoad = 1 in
+def t2STRDi8 : T2Ii8s4<(outs), (ins GPR:$src, t2addrmode_imm8s4:$addr),
+                        "strd", " $src, $addr", []>;
+
+// Indexed stores
+def t2STR_PRE  : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePre,
+                          "str", " $src, [$base, $offset]!", "$base = $base_wb",
+             [(set GPR:$base_wb,
+                   (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                           "str", " $src, [$base], $offset", "$base = $base_wb",
+             [(set GPR:$base_wb,
+                   (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STRH_PRE  : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePre,
+                         "strh", " $src, [$base, $offset]!", "$base = $base_wb",
+        [(set GPR:$base_wb,
+              (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STRH_POST : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                          "strh", " $src, [$base], $offset", "$base = $base_wb",
+       [(set GPR:$base_wb,
+             (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STRB_PRE  : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePre,
+                         "strb", " $src, [$base, $offset]!", "$base = $base_wb",
+         [(set GPR:$base_wb,
+               (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb),
+                            (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset),
+                            AddrModeT2_i8, IndexModePost,
+                          "strb", " $src, [$base], $offset", "$base = $base_wb",
+        [(set GPR:$base_wb,
+              (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>;
+
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1, AddedComplexity = 10 in {
+let canFoldAsLoad = 1 in
+def t2PICLDR   : T2I_picld<"ldr",  UnOpFrag<(load node:$Src)>>;
+
+def t2PICLDRH  : T2I_picld<"ldrh", UnOpFrag<(zextloadi16 node:$Src)>>;
+def t2PICLDRB  : T2I_picld<"ldrb", UnOpFrag<(zextloadi8 node:$Src)>>;
+def t2PICLDRSH : T2I_picld<"ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
+def t2PICLDRSB : T2I_picld<"ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
+
+def t2PICSTR   : T2I_picst<"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
+def t2PICSTRH  : T2I_picst<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+def t2PICSTRB  : T2I_picst<"strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+} // isNotDuplicable = 1, AddedComplexity = 10
+
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+let mayLoad = 1 in
+def t2LDM : T2XI<(outs),
+                 (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+                 "ldm${p}${addr:submode} $addr, $dst1", []>;
+
+let mayStore = 1 in
+def t2STM : T2XI<(outs),
+                 (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops),
+                 "stm${p}${addr:submode} $addr, $src1", []>;
+
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
@@ -435,6 +738,40 @@ def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
                            (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>;
 
 //===----------------------------------------------------------------------===//
+//  Extend Instructions.
+//
+
+// Sign extenders
+
+defm t2SXTB  : T2I_unary_rrot<"sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm t2SXTH  : T2I_unary_rrot<"sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm t2SXTAB : T2I_bin_rrot<"sxtab",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm t2SXTAH : T2I_bin_rrot<"sxtah",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// TODO: SXT(A){B|H}16
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm t2UXTB   : T2I_unary_rrot<"uxtb"  , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm t2UXTH   : T2I_unary_rrot<"uxth"  , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm t2UXTB16 : T2I_unary_rrot<"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+            (t2UXTB16r_rot GPR:$Src, 24)>;
+def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
+            (t2UXTB16r_rot GPR:$Src, 8)>;
+
+defm t2UXTAB : T2I_bin_rrot<"uxtab",
+                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm t2UXTAH : T2I_bin_rrot<"uxtah",
+                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+//===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
 //
 
@@ -563,21 +900,24 @@ def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src),
 
 defm t2CMP   : T2I_cmp_is<"cmp",
                           BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-defm t2CMPnz : T2I_cmp_is<"cmp",
-                          BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
+defm t2CMPz : T2I_cmp_is<"cmp",
+                         BinOpFrag<(ARMcmpZ node:$LHS, node:$RHS)>>;
 
 defm t2CMN   : T2I_cmp_is<"cmn",
                           BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
-defm t2CMNnz : T2I_cmp_is<"cmn",
-                          BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
+defm t2CMNz : T2I_cmp_is<"cmn",
+                         BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
 
 def : T2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
             (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
 
-def : T2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
+def : T2Pat<(ARMcmpZ  GPR:$src, t2_so_imm_neg:$imm),
             (t2CMNri   GPR:$src, t2_so_imm_neg:$imm)>;
 
-// FIXME: TST, TEQ, etc.
+defm t2TST  : T2I_cmp_is<"tst",
+                         BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>;
+defm t2TEQ  : T2I_cmp_is<"teq",
+                         BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
 
 // A8.6.27  CBNZ, CBZ - Compare and branch on (non)zero.
 // Short range conditional branch. Looks awesome for loops. Need to figure
@@ -585,6 +925,42 @@ def : T2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
 
 // FIXME: Conditional moves
 
+//===----------------------------------------------------------------------===//
+// Control-Flow Instructions
+//
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+let isPredicable = 1 in
+def t2B   : T2XI<(outs), (ins brtarget:$target),
+                 "b $target",
+                 [(br bb:$target)]>;
+
+let isNotDuplicable = 1, isIndirectBranch = 1 in {
+def t2BR_JTr : T2JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
+                     "mov pc, $target \n$jt",
+                     [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+
+def t2BR_JTm : 
+    T2JTI<(outs),
+          (ins t2addrmode_so_reg:$target, jtblock_operand:$jt, i32imm:$id),
+          "ldr pc, $target \n$jt",
+          [(ARMbrjt (i32 (load t2addrmode_so_reg:$target)), tjumptable:$jt,
+             imm:$id)]>;
+
+def t2BR_JTadd : 
+    T2JTI<(outs),
+          (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
+          "add pc, $target, $idx \n$jt",
+          [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]>;
+} // isNotDuplicate, isIndirectBranch
+} // isBranch, isTerminator, isBarrier
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+def t2Bcc : T2I<(outs), (ins brtarget:$target), 
+                "b", " $target",
+                [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 0b0e289..66d3df6 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -30,6 +30,11 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// Used to initialized Align, so must precede it.
   bool isThumb;
 
+  /// hasThumb2 - True if the target architecture supports Thumb2. Do not use
+  /// to determine if function is compiled under Thumb mode, for that use
+  /// 'isThumb'.
+  bool hasThumb2;
+
   /// Align - required alignment.  ARM functions and Thumb functions with
   /// constant pools require 4-byte alignment; other Thumb functions
   /// require only 2-byte alignment.
@@ -91,7 +96,8 @@ class ARMFunctionInfo : public MachineFunctionInfo {
 
 public:
   ARMFunctionInfo() :
-    isThumb(false), 
+    isThumb(false),
+    hasThumb2(false),
     Align(2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
     LRSpilledForFarJump(false), R3IsLiveIn(false),
@@ -102,6 +108,7 @@ public:
 
   explicit ARMFunctionInfo(MachineFunction &MF) :
     isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+    hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
     Align(isThumb ? 1U : 2U),
     VarArgsRegSaveSize(0), HasStackFrame(false),
     LRSpilledForFarJump(false), R3IsLiveIn(false),
@@ -112,6 +119,7 @@ public:
     JumpTableUId(0), ConstPoolEntryUId(0) {}
 
   bool isThumbFunction() const { return isThumb; }
+  bool isThumb2Function() const { return isThumb && hasThumb2; }
 
   unsigned getAlign() const { return Align; }
   void setAlign(unsigned a) { Align = a; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index bb0cc8f..f809f37 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -31,16 +31,9 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
-#include <algorithm>
 using namespace llvm;
 
-static cl::opt<bool> ThumbRegScavenging("enable-thumb-reg-scavenging",
-                               cl::Hidden,
-                               cl::desc("Enable register scavenging on Thumb"));
-
-unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   using namespace ARM;
   switch (RegEnum) {
   case R0:  case S0:  case D0:  return 0;
@@ -81,8 +74,8 @@ unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
   }
 }
 
-unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum,
-                                               bool &isSPVFP) {
+unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
+                                                   bool &isSPVFP) {
   isSPVFP = false;
 
   using namespace ARM;
@@ -108,12 +101,12 @@ unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum,
   case PC:  case D15: return 15;
 
   case S0: case S1: case S2: case S3:
-  case S4: case S5: case S6: case S7: 
-  case S8: case S9: case S10: case S11: 
-  case S12: case S13: case S14: case S15: 
-  case S16: case S17: case S18: case S19: 
-  case S20: case S21: case S22: case S23: 
-  case S24: case S25: case S26: case S27: 
+  case S4: case S5: case S6: case S7:
+  case S8: case S9: case S10: case S11:
+  case S12: case S13: case S14: case S15:
+  case S16: case S17: case S18: case S19:
+  case S20: case S21: case S22: case S23:
+  case S24: case S25: case S26: case S27:
   case S28: case S29: case S30: case S31:  {
     isSPVFP = true;
     switch (RegEnum) {
@@ -155,13 +148,18 @@ unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum,
   }
 }
 
-ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
-                                 const ARMSubtarget &sti)
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const TargetInstrInfo &tii,
+                                         const ARMSubtarget &sti)
   : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     TII(tii), STI(sti),
     FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
 }
 
+ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
+                                 const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
 static inline
 const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
   return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
@@ -176,55 +174,22 @@ const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
 /// specified immediate.
 void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator &MBBI,
+                                        const TargetInstrInfo *TII, DebugLoc dl,
                                         unsigned DestReg, int Val,
-                                        unsigned Pred, unsigned PredReg,
-                                        const TargetInstrInfo *TII,
-                                        bool isThumb,
-                                        DebugLoc dl) const {
+                                        ARMCC::CondCodes Pred,
+                                        unsigned PredReg) const {
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   Constant *C = ConstantInt::get(Type::Int32Ty, Val);
   unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
-  if (isThumb)
-    BuildMI(MBB, MBBI, dl, 
-            TII->get(ARM::tLDRcp),DestReg).addConstantPoolIndex(Idx);
-  else
-    BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg)
-      .addConstantPoolIndex(Idx)
-      .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
-}
 
-/// isLowRegister - Returns true if the register is low register r0-r7.
-///
-bool ARMRegisterInfo::isLowRegister(unsigned Reg) const {
-  using namespace ARM;
-  switch (Reg) {
-  case R0:  case R1:  case R2:  case R3:
-  case R4:  case R5:  case R6:  case R7:
-    return true;
-  default:
-    return false;
-  }
-}
-
-const TargetRegisterClass*
-ARMRegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
-  if (STI.isThumb()) {
-    if (isLowRegister(Reg))
-      return ARM::tGPRRegisterClass;
-    switch (Reg) {
-    default:
-      break;
-    case ARM::R8:  case ARM::R9:  case ARM::R10:  case ARM::R11:
-    case ARM::R12: case ARM::SP:  case ARM::LR:   case ARM::PC:
-      return ARM::GPRRegisterClass;
-    }
-  }
-  return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
+  BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg)
+    .addConstantPoolIndex(Idx)
+    .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
 }
 
 const unsigned*
-ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   static const unsigned CalleeSavedRegs[] = {
     ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
     ARM::R7, ARM::R6,  ARM::R5,  ARM::R4,
@@ -248,7 +213,7 @@ ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 }
 
 const TargetRegisterClass* const *
-ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
   static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
     &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
     &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
@@ -297,7 +262,7 @@ ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses;
 }
 
-BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+BitVector ARMBaseRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   // FIXME: avoid re-calculating this everytime.
   BitVector Reserved(getNumRegs());
   Reserved.set(ARM::SP);
@@ -311,7 +276,7 @@ BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 }
 
 bool
-ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
+ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
   switch (Reg) {
   default: break;
   case ARM::SP:
@@ -329,16 +294,16 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
   return false;
 }
 
-const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
+const TargetRegisterClass *ARMBaseRegisterInfo::getPointerRegClass() const {
   return &ARM::GPRRegClass;
 }
 
 /// getAllocationOrder - Returns the register allocation order for a specified
 /// register class in the form of a pair of TargetRegisterClass iterators.
 std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
-ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
-                                    unsigned HintType, unsigned HintReg,
-                                    const MachineFunction &MF) const {
+ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
+                                        unsigned HintType, unsigned HintReg,
+                                        const MachineFunction &MF) const {
   // Alternative register allocation orders when favoring even / odd registers
   // of register pairs.
 
@@ -479,8 +444,8 @@ ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
 /// ResolveRegAllocHint - Resolves the specified register allocation hint
 /// to a physical register. Returns the physical register if it is successful.
 unsigned
-ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                                     const MachineFunction &MF) const {
+ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                         const MachineFunction &MF) const {
   if (Reg == 0 || !isPhysicalRegister(Reg))
     return 0;
   if (Type == 0)
@@ -495,8 +460,8 @@ ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
 }
 
 void
-ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
-                                    MachineFunction &MF) const {
+ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                        MachineFunction &MF) const {
   MachineRegisterInfo *MRI = &MF.getRegInfo();
   std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
   if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
@@ -516,15 +481,14 @@ ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
 
 bool
 ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
-  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  return ThumbRegScavenging || !AFI->isThumbFunction();
+  return true;
 }
 
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
 ///
-bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const {
+bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   return (NoFramePointerElim ||
           MFI->hasVarSizedObjects() ||
@@ -539,18 +503,13 @@ bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const {
 bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
   const MachineFrameInfo *FFI = MF.getFrameInfo();
   unsigned CFSize = FFI->getMaxCallFrameSize();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   // It's not always a good idea to include the call frame as part of the
   // stack frame. ARM (especially Thumb) has small immediate offset to
   // address the stack frame. So a large call frame can cause poor codegen
   // and may even makes it impossible to scavenge a register.
-  if (AFI->isThumbFunction()) {
-    if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
-      return false;
-  } else {
-    if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
-      return false;
-  }
+  if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
+    return false;
+
   return !MF.getFrameInfo()->hasVarSizedObjects();
 }
 
@@ -570,14 +529,14 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
     unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
     unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
     assert(ThisVal && "Didn't extract field correctly");
-    
+
     // We will handle these bits from offset, clear them.
     NumBytes &= ~ThisVal;
-    
+
     // Get the properly encoded SOImmVal field.
     int SOImmVal = ARM_AM::getSOImmVal(ThisVal);
     assert(SOImmVal != -1 && "Bit extraction didn't work?");
-    
+
     // Build the new ADD / SUB.
     BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg)
       .addReg(BaseReg, RegState::Kill).addImm(SOImmVal)
@@ -586,204 +545,13 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
   }
 }
 
-/// calcNumMI - Returns the number of instructions required to materialize
-/// the specific add / sub r, c instruction.
-static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
-                          unsigned NumBits, unsigned Scale) {
-  unsigned NumMIs = 0;
-  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
-
-  if (Opc == ARM::tADDrSPi) {
-    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-    Bytes -= ThisVal;
-    NumMIs++;
-    NumBits = 8;
-    Scale = 1;  // Followed by a number of tADDi8.
-    Chunk = ((1 << NumBits) - 1) * Scale;
-  }
-
-  NumMIs += Bytes / Chunk;
-  if ((Bytes % Chunk) != 0)
-    NumMIs++;
-  if (ExtraOpc)
-    NumMIs++;
-  return NumMIs;
-}
-
-/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
-/// in a register using mov / mvn sequences or load the immediate from a
-/// constpool entry.
-static
-void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator &MBBI,
-                              unsigned DestReg, unsigned BaseReg,
-                              int NumBytes, bool CanChangeCC,
-                              const TargetInstrInfo &TII,
-                              const ARMRegisterInfo& MRI,
-                              DebugLoc dl) {
-    bool isHigh = !MRI.isLowRegister(DestReg) ||
-                  (BaseReg != 0 && !MRI.isLowRegister(BaseReg));
-    bool isSub = false;
-    // Subtract doesn't have high register version. Load the negative value
-    // if either base or dest register is a high register. Also, if do not
-    // issue sub as part of the sequence if condition register is to be
-    // preserved.
-    if (NumBytes < 0 && !isHigh && CanChangeCC) {
-      isSub = true;
-      NumBytes = -NumBytes;
-    }
-    unsigned LdReg = DestReg;
-    if (DestReg == ARM::SP) {
-      assert(BaseReg == ARM::SP && "Unexpected!");
-      LdReg = ARM::R3;
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-        .addReg(ARM::R3, RegState::Kill);
-    }
-
-    if (NumBytes <= 255 && NumBytes >= 0)
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
-    else if (NumBytes < 0 && NumBytes >= -255) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
-        .addReg(LdReg, RegState::Kill);
-    } else
-      MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, ARMCC::AL, 0, &TII, 
-                            true, dl);
-
-    // Emit add / sub.
-    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
-    const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, 
-                                            TII.get(Opc), DestReg);
-    if (DestReg == ARM::SP || isSub)
-      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
-    else
-      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
-    if (DestReg == ARM::SP)
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
-        .addReg(ARM::R12, RegState::Kill);
-}
-
-/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code.
-static
-void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator &MBBI,
-                               unsigned DestReg, unsigned BaseReg,
-                               int NumBytes, const TargetInstrInfo &TII,
-                               const ARMRegisterInfo& MRI,
-                               DebugLoc dl) {
-  bool isSub = NumBytes < 0;
-  unsigned Bytes = (unsigned)NumBytes;
-  if (isSub) Bytes = -NumBytes;
-  bool isMul4 = (Bytes & 3) == 0;
-  bool isTwoAddr = false;
-  bool DstNotEqBase = false;
-  unsigned NumBits = 1;
-  unsigned Scale = 1;
-  int Opc = 0;
-  int ExtraOpc = 0;
-
-  if (DestReg == BaseReg && BaseReg == ARM::SP) {
-    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
-    NumBits = 7;
-    Scale = 4;
-    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
-    isTwoAddr = true;
-  } else if (!isSub && BaseReg == ARM::SP) {
-    // r1 = add sp, 403
-    // =>
-    // r1 = add sp, 100 * 4
-    // r1 = add r1, 3
-    if (!isMul4) {
-      Bytes &= ~3;
-      ExtraOpc = ARM::tADDi3;
-    }
-    NumBits = 8;
-    Scale = 4;
-    Opc = ARM::tADDrSPi;
-  } else {
-    // sp = sub sp, c
-    // r1 = sub sp, c
-    // r8 = sub sp, c
-    if (DestReg != BaseReg)
-      DstNotEqBase = true;
-    NumBits = 8;
-    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
-    isTwoAddr = true;
-  }
-
-  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
-  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
-  if (NumMIs > Threshold) {
-    // This will expand into too many instructions. Load the immediate from a
-    // constpool entry.
-    emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
-                             MRI, dl);
-    return;
-  }
-
-  if (DstNotEqBase) {
-    if (MRI.isLowRegister(DestReg) && MRI.isLowRegister(BaseReg)) {
-      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
-      unsigned Chunk = (1 << 3) - 1;
-      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-      Bytes -= ThisVal;
-      BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
-        .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
-    } else {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
-        .addReg(BaseReg, RegState::Kill);
-    }
-    BaseReg = DestReg;
-  }
-
-  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
-  while (Bytes) {
-    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
-    Bytes -= ThisVal;
-    ThisVal /= Scale;
-    // Build the new tADD / tSUB.
-    if (isTwoAddr)
-      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-        .addReg(DestReg).addImm(ThisVal);
-    else {
-      bool isKill = BaseReg != ARM::SP;
-      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
-        .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
-      BaseReg = DestReg;
-
-      if (Opc == ARM::tADDrSPi) {
-        // r4 = add sp, imm
-        // r4 = add r4, imm
-        // ...
-        NumBits = 8;
-        Scale = 1;
-        Chunk = ((1 << NumBits) - 1) * Scale;
-        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
-        isTwoAddr = true;
-      }
-    }
-  }
-
-  if (ExtraOpc)
-    BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
-      .addReg(DestReg, RegState::Kill)
-      .addImm(((unsigned)NumBytes) & 3);
-}
-
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                  int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg,
-                  bool isThumb, const TargetInstrInfo &TII, 
-                  const ARMRegisterInfo& MRI,
-                  DebugLoc dl) {
-  if (isThumb)
-    emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
-                              MRI, dl);
-  else
-    emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes,
-                            Pred, PredReg, TII, dl);
+static void
+emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+             const TargetInstrInfo &TII, DebugLoc dl,
+             int NumBytes,
+             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+  emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes,
+                          Pred, PredReg, TII, dl);
 }
 
 void ARMRegisterInfo::
@@ -797,7 +565,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     DebugLoc dl = Old->getDebugLoc();
     unsigned Amount = Old->getOperand(0).getImm();
     if (Amount != 0) {
-      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
       // We need to keep the stack aligned properly.  To do this, we round the
       // amount of space needed for the outgoing arguments up to the next
       // alignment boundary.
@@ -806,46 +573,22 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
       // Replace the pseudo instruction with a new instruction...
       unsigned Opc = Old->getOpcode();
-      bool isThumb = AFI->isThumbFunction();
-      ARMCC::CondCodes Pred = isThumb
-        ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(1).getImm();
+      ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm();
       if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
         // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
-        unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg();
-        emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII, *this, dl);
+        unsigned PredReg = Old->getOperand(2).getReg();
+        emitSPUpdate(MBB, I, TII, dl, -Amount, Pred, PredReg);
       } else {
         // Note: PredReg is operand 3 for ADJCALLSTACKUP.
-        unsigned PredReg = isThumb ? 0 : Old->getOperand(3).getReg();
+        unsigned PredReg = Old->getOperand(3).getReg();
         assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
-        emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII, *this, dl);
+        emitSPUpdate(MBB, I, TII, dl, Amount, Pred, PredReg);
       }
     }
   }
   MBB.erase(I);
 }
 
-/// emitThumbConstant - Emit a series of instructions to materialize a
-/// constant.
-static void emitThumbConstant(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator &MBBI,
-                              unsigned DestReg, int Imm,
-                              const TargetInstrInfo &TII,
-                              const ARMRegisterInfo& MRI,
-                              DebugLoc dl) {
-  bool isSub = Imm < 0;
-  if (isSub) Imm = -Imm;
-
-  int Chunk = (1 << 8) - 1;
-  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
-  Imm -= ThisVal;
-  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
-  if (Imm > 0) 
-    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
-  if (isSub)
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
-      .addReg(DestReg, RegState::Kill);
-}
-
 /// findScratchRegister - Find a 'free' ARM register. If register scavenger
 /// is not being used, R12 is available. Otherwise, try for a call-clobbered
 /// register first and then a spilled callee-saved register if that fails.
@@ -868,17 +611,16 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineBasicBlock &MBB = *MI.getParent();
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  bool isThumb = AFI->isThumbFunction();
   DebugLoc dl = MI.getDebugLoc();
 
   while (!MI.getOperand(i).isFI()) {
     ++i;
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
   }
-  
+
   unsigned FrameReg = ARM::SP;
   int FrameIndex = MI.getOperand(i).getIndex();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + 
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
                MF.getFrameInfo()->getStackSize() + SPAdj;
 
   if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
@@ -926,86 +668,20 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       MI.getOperand(i+1).ChangeToImmediate(ImmedOffset);
       return;
     }
-    
+
     // Otherwise, we fallback to common code below to form the imm offset with
     // a sequence of ADDri instructions.  First though, pull as much of the imm
     // into this ADDri as possible.
     unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
     unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
-    
+
     // We will handle these bits from offset, clear them.
     Offset &= ~ThisImmVal;
-    
+
     // Get the properly encoded SOImmVal field.
     int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal);
-    assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");    
+    assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");
     MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal);
-  } else if (Opcode == ARM::tADDrSPi) {
-    Offset += MI.getOperand(i+1).getImm();
-
-    // Can't use tADDrSPi if it's based off the frame pointer.
-    unsigned NumBits = 0;
-    unsigned Scale = 1;
-    if (FrameReg != ARM::SP) {
-      Opcode = ARM::tADDi3;
-      MI.setDesc(TII.get(ARM::tADDi3));
-      NumBits = 3;
-    } else {
-      NumBits = 8;
-      Scale = 4;
-      assert((Offset & 3) == 0 &&
-             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
-    }
-
-    if (Offset == 0) {
-      // Turn it into a move.
-      MI.setDesc(TII.get(ARM::tMOVhir2lor));
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.RemoveOperand(i+1);
-      return;
-    }
-
-    // Common case: small offset, fits into instruction.
-    unsigned Mask = (1 << NumBits) - 1;
-    if (((Offset / Scale) & ~Mask) == 0) {
-      // Replace the FrameIndex with sp / fp
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
-      return;
-    }
-
-    unsigned DestReg = MI.getOperand(0).getReg();
-    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
-    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
-    // MI would expand into a large number of instructions. Don't try to
-    // simplify the immediate.
-    if (NumMIs > 2) {
-      emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
-                                *this, dl);
-      MBB.erase(II);
-      return;
-    }
-
-    if (Offset > 0) {
-      // Translate r0 = add sp, imm to
-      // r0 = add sp, 255*4
-      // r0 = add r0, (imm - 255*4)
-      MI.getOperand(i).ChangeToRegister(FrameReg, false);
-      MI.getOperand(i+1).ChangeToImmediate(Mask);
-      Offset = (Offset - Mask * Scale);
-      MachineBasicBlock::iterator NII = next(II);
-      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
-                                *this, dl);
-    } else {
-      // Translate r0 = add sp, -imm to
-      // r0 = -imm (this is then translated into a series of instructons)
-      // r0 = add r0, sp
-      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
-      MI.setDesc(TII.get(ARM::tADDhirr));
-      MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
-      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
-    }
-    return;
   } else {
     unsigned ImmIdx = 0;
     int InstrOffs = 0;
@@ -1037,13 +713,6 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       Scale = 4;
       break;
     }
-    case ARMII::AddrModeTs: {
-      ImmIdx = i+1;
-      InstrOffs = MI.getOperand(ImmIdx).getImm();
-      NumBits = (FrameReg == ARM::SP) ? 8 : 5;
-      Scale = 4;
-      break;
-    }
     default:
       assert(0 && "Unsupported addressing mode!");
       abort();
@@ -1052,7 +721,7 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
     Offset += InstrOffs * Scale;
     assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
-    if (Offset < 0 && !isThumb) {
+    if (Offset < 0) {
       Offset = -Offset;
       isSub = true;
     }
@@ -1070,121 +739,34 @@ void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       return;
     }
 
-    bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
-    if (AddrMode == ARMII::AddrModeTs) {
-      // Thumb tLDRspi, tSTRspi. These will change to instructions that use
-      // a different base register.
-      NumBits = 5;
-      Mask = (1 << NumBits) - 1;
-    }
-    // If this is a thumb spill / restore, we will be using a constpool load to
-    // materialize the offset.
-    if (AddrMode == ARMII::AddrModeTs && isThumSpillRestore)
-      ImmOp.ChangeToImmediate(0);
-    else {
-      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
-      ImmedOffset = ImmedOffset & Mask;
-      if (isSub)
-        ImmedOffset |= 1 << NumBits;
-      ImmOp.ChangeToImmediate(ImmedOffset);
-      Offset &= ~(Mask*Scale);
-    }
+    // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+    ImmedOffset = ImmedOffset & Mask;
+    if (isSub)
+      ImmedOffset |= 1 << NumBits;
+    ImmOp.ChangeToImmediate(ImmedOffset);
+    Offset &= ~(Mask*Scale);
   }
-  
+
   // If we get here, the immediate doesn't fit into the instruction.  We folded
   // as much as possible above, handle the rest, providing a register that is
   // SP+LargeImm.
   assert(Offset && "This code isn't needed if offset already handled!");
 
-  if (isThumb) {
-    if (Desc.mayLoad()) {
-      // Use the destination register to materialize sp + offset.
-      unsigned TmpReg = MI.getOperand(0).getReg();
-      bool UseRR = false;
-      if (Opcode == ARM::tRestore) {
-        if (FrameReg == ARM::SP)
-          emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
-                                   Offset, false, TII, *this, dl);
-        else {
-          emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
-                            true, dl);
-          UseRR = true;
-        }
-      } else
-        emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
-                                  *this, dl);
-      MI.setDesc(TII.get(ARM::tLDR));
-      MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
-      if (UseRR)
-        // Use [reg, reg] addrmode.
-        MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
-      else  // tLDR has an extra register operand.
-        MI.addOperand(MachineOperand::CreateReg(0, false));
-    } else if (Desc.mayStore()) {
-      // FIXME! This is horrific!!! We need register scavenging.
-      // Our temporary workaround has marked r3 unavailable. Of course, r3 is
-      // also a ABI register so it's possible that is is the register that is
-      // being storing here. If that's the case, we do the following:
-      // r12 = r2
-      // Use r2 to materialize sp + offset
-      // str r3, r2
-      // r2 = r12
-      unsigned ValReg = MI.getOperand(0).getReg();
-      unsigned TmpReg = ARM::R3;
-      bool UseRR = false;
-      if (ValReg == ARM::R3) {
-        BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-          .addReg(ARM::R2, RegState::Kill);
-        TmpReg = ARM::R2;
-      }
-      if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
-        BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
-          .addReg(ARM::R3, RegState::Kill);
-      if (Opcode == ARM::tSpill) {
-        if (FrameReg == ARM::SP)
-          emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
-                                   Offset, false, TII, *this, dl);
-        else {
-          emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
-                            true, dl);
-          UseRR = true;
-        }
-      } else
-        emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
-                                  *this, dl);
-      MI.setDesc(TII.get(ARM::tSTR));
-      MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
-      if (UseRR)  // Use [reg, reg] addrmode.
-        MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
-      else // tSTR has an extra register operand.
-        MI.addOperand(MachineOperand::CreateReg(0, false));
-
-      MachineBasicBlock::iterator NII = next(II);
-      if (ValReg == ARM::R3)
-        BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
-          .addReg(ARM::R12, RegState::Kill);
-      if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
-        BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
-          .addReg(ARM::R12, RegState::Kill);
-    } else
-      assert(false && "Unexpected opcode!");
-  } else {
-    // Insert a set of r12 with the full address: r12 = sp + offset
-    // If the offset we have is too large to fit into the instruction, we need
-    // to form it with a series of ADDri's.  Do this by taking 8-bit chunks
-    // out of 'Offset'.
-    unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI);
-    if (ScratchReg == 0)
-      // No register is "free". Scavenge a register.
-      ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj);
-    int PIdx = MI.findFirstPredOperandIdx();
-    ARMCC::CondCodes Pred = (PIdx == -1)
-      ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
-    unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
-    emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
-                            isSub ? -Offset : Offset, Pred, PredReg, TII, dl);
-    MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
-  }
+  // Insert a set of r12 with the full address: r12 = sp + offset
+  // If the offset we have is too large to fit into the instruction, we need
+  // to form it with a series of ADDri's.  Do this by taking 8-bit chunks
+  // out of 'Offset'.
+  unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI);
+  if (ScratchReg == 0)
+    // No register is "free". Scavenge a register.
+    ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj);
+  int PIdx = MI.findFirstPredOperandIdx();
+  ARMCC::CondCodes Pred = (PIdx == -1)
+    ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+  unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+  emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
+                          isSub ? -Offset : Offset, Pred, PredReg, TII, dl);
+  MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
 }
 
 static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
@@ -1206,8 +788,8 @@ static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
 }
 
 void
-ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                      RegScavenger *RS) const {
+ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                          RegScavenger *RS) const {
   // This tells PEI to spill the FP as if it is any other callee-save register
   // to take advantage the eliminateFrameIndex machinery. This also ensures it
   // is spilled in the order specified by getCalleeSavedRegs() to make it easier
@@ -1266,7 +848,7 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
         default:
           break;
         }
-      } else { 
+      } else {
         if (!STI.isTargetDarwin()) {
           UnspilledCS1GPRs.push_back(Reg);
           continue;
@@ -1332,7 +914,8 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
         for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
           unsigned Reg = UnspilledCS1GPRs[i];
           // Don't spiil high register if the function is thumb
-          if (!AFI->isThumbFunction() || isLowRegister(Reg) || Reg == ARM::LR) {
+          if (!AFI->isThumbFunction() ||
+              isARMLowRegister(Reg) || Reg == ARM::LR) {
             MF.getRegInfo().setPhysRegUsed(Reg);
             AFI->setCSRegisterIsSpilled(Reg);
             if (!isReservedReg(MF, Reg))
@@ -1351,7 +934,7 @@ ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     }
 
     // Estimate if we might need to scavenge a register at some point in order
-    // to materialize a stack offset. If so, either spill one additiona
+    // to materialize a stack offset. If so, either spill one additional
     // callee-saved register or reserve a special spill slot to facilitate
     // register scavenging.
     if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) {
@@ -1460,40 +1043,23 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo  *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  bool isThumb = AFI->isThumbFunction();
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   DebugLoc dl = (MBBI != MBB.end() ?
                  MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
 
-  if (isThumb) {
-    // Check if R3 is live in. It might have to be used as a scratch register.
-    for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
-         E = MF.getRegInfo().livein_end(); I != E; ++I) {
-      if (I->first == ARM::R3) {
-        AFI->setR3IsLiveIn(true);
-        break;
-      }
-    }
-
-    // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
-    NumBytes = (NumBytes + 3) & ~3;
-    MFI->setStackSize(NumBytes);
-  }
-
   // Determine the sizes of each callee-save spill areas and record which frame
   // belongs to which callee-save spill areas.
   unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
   int FramePtrSpillFI = 0;
 
   if (VARegSaveSize)
-    emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
-                 *this, dl);
+    emitSPUpdate(MBB, MBBI, TII, dl, -VARegSaveSize);
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+      emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes);
     return;
   }
 
@@ -1531,34 +1097,25 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
     }
   }
 
-  if (!isThumb) {
-    // Build the new SUBri to adjust SP for integer callee-save spill area 1.
-    emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII, *this, dl);
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
-  } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
-    ++MBBI;
-    if (MBBI != MBB.end())
-      dl = MBBI->getDebugLoc();
-  }
+  // Build the new SUBri to adjust SP for integer callee-save spill area 1.
+  emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS1Size);
+  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
 
   // Darwin ABI requires FP to point to the stack slot that contains the
   // previous FP.
   if (STI.isTargetDarwin() || hasFP(MF)) {
     MachineInstrBuilder MIB =
-      BuildMI(MBB, MBBI, dl, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri), 
-              FramePtr)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::ADDri), FramePtr)
       .addFrameIndex(FramePtrSpillFI).addImm(0);
-    if (!isThumb) AddDefaultCC(AddDefaultPred(MIB));
+    AddDefaultCC(AddDefaultPred(MIB));
   }
 
-  if (!isThumb) {
-    // Build the new SUBri to adjust SP for integer callee-save spill area 2.
-    emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII, *this, dl);
+  // Build the new SUBri to adjust SP for integer callee-save spill area 2.
+  emitSPUpdate(MBB, MBBI, TII, dl, -GPRCS2Size);
 
-    // Build the new SUBri to adjust SP for FP callee-save spill area.
-    movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
-    emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII, *this, dl);
-  }
+  // Build the new SUBri to adjust SP for FP callee-save spill area.
+  movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
+  emitSPUpdate(MBB, MBBI, TII, dl, -DPRCSSize);
 
   // Determine starting offsets of spill areas.
   unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
@@ -1568,16 +1125,15 @@ void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
   AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
-  
+
   NumBytes = DPRCSOffset;
   if (NumBytes) {
     // Insert it after all the callee-save spills.
-    if (!isThumb)
-      movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
-    emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
+    emitSPUpdate(MBB, MBBI, TII, dl, -NumBytes);
   }
 
-  if(STI.isTargetELF() && hasFP(MF)) {
+  if (STI.isTargetELF() && hasFP(MF)) {
     MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
                              AFI->getFramePtrSpillOffset());
   }
@@ -1596,8 +1152,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
 
 static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
   return ((MI->getOpcode() == ARM::FLDD ||
-           MI->getOpcode() == ARM::LDR  ||
-           MI->getOpcode() == ARM::tRestore) &&
+           MI->getOpcode() == ARM::LDR) &&
           MI->getOperand(1).isFI() &&
           isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
 }
@@ -1605,20 +1160,17 @@ static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
 void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  assert((MBBI->getOpcode() == ARM::BX_RET ||
-          MBBI->getOpcode() == ARM::tBX_RET ||
-          MBBI->getOpcode() == ARM::tPOP_RET) &&
+  assert(MBBI->getOpcode() == ARM::BX_RET &&
          "Can only insert epilog into returning blocks");
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  bool isThumb = AFI->isThumbFunction();
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   int NumBytes = (int)MFI->getStackSize();
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
-      emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+      emitSPUpdate(MBB, MBBI, TII, dl, NumBytes);
   } else {
     // Unwind MBBI to point to first LDR / FLDD.
     const unsigned *CSRegs = getCalleeSavedRegs();
@@ -1634,110 +1186,73 @@ void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
     NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
                  AFI->getGPRCalleeSavedArea2Size() +
                  AFI->getDPRCalleeSavedAreaSize());
-    if (isThumb) {
-      if (hasFP(MF)) {
-        NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
-        // Reset SP based on frame pointer only if the stack frame extends beyond
-        // frame pointer stack slot or target is ELF and the function has FP.
+
+    // Darwin ABI requires FP to point to the stack slot that contains the
+    // previous FP.
+    if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot or target is ELF and the function has FP.
+      if (AFI->getGPRCalleeSavedArea2Size() ||
+          AFI->getDPRCalleeSavedAreaSize()  ||
+          AFI->getDPRCalleeSavedAreaOffset()||
+          hasFP(MF)) {
         if (NumBytes)
-          emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
-                                    TII, *this, dl);
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
+            .addImm(NumBytes)
+            .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
         else
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
-            .addReg(FramePtr);
-      } else {
-        if (MBBI->getOpcode() == ARM::tBX_RET &&
-            &MBB.front() != MBBI &&
-            prior(MBBI)->getOpcode() == ARM::tPOP) {
-          MachineBasicBlock::iterator PMBBI = prior(MBBI);
-          emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
-                       *this, dl);
-        } else
-          emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
-                       *this, dl);
-      }
-    } else {
-      // Darwin ABI requires FP to point to the stack slot that contains the
-      // previous FP.
-      if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) {
-        NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
-        // Reset SP based on frame pointer only if the stack frame extends beyond
-        // frame pointer stack slot or target is ELF and the function has FP.
-        if (AFI->getGPRCalleeSavedArea2Size() ||
-            AFI->getDPRCalleeSavedAreaSize()  ||
-            AFI->getDPRCalleeSavedAreaOffset()||
-            hasFP(MF)) {
-          if (NumBytes)
-            BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
-              .addImm(NumBytes)
-              .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
-          else
-            BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr)
-              .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
-        }
-      } else if (NumBytes) {
-        emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII, *this, dl);
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr)
+            .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
       }
+    } else if (NumBytes) {
+      emitSPUpdate(MBB, MBBI, TII, dl, NumBytes);
+    }
 
-      // Move SP to start of integer callee save spill area 2.
-      movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
-      emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0,
-                   false, TII, *this, dl);
+    // Move SP to start of integer callee save spill area 2.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
+    emitSPUpdate(MBB, MBBI, TII, dl, AFI->getDPRCalleeSavedAreaSize());
 
-      // Move SP to start of integer callee save spill area 1.
-      movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
-      emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0,
-                   false, TII, *this, dl);
+    // Move SP to start of integer callee save spill area 1.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
+    emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea2Size());
 
-      // Move SP to SP upon entry to the function.
-      movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
-      emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0,
-                   false, TII, *this, dl);
-    }
+    // Move SP to SP upon entry to the function.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
+    emitSPUpdate(MBB, MBBI, TII, dl, AFI->getGPRCalleeSavedArea1Size());
   }
 
-  if (VARegSaveSize) {
-    if (isThumb)
-      // Epilogue for vararg functions: pop LR to R3 and branch off it.
-      // FIXME: Verify this is still ok when R3 is no longer being reserved.
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
-
-    emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
-                 *this, dl);
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, VARegSaveSize);
 
-    if (isThumb) {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
-      MBB.erase(MBBI);
-    }
-  }
 }
 
-unsigned ARMRegisterInfo::getRARegister() const {
+unsigned ARMBaseRegisterInfo::getRARegister() const {
   return ARM::LR;
 }
 
-unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const {
   if (STI.isTargetDarwin() || hasFP(MF))
     return FramePtr;
   return ARM::SP;
 }
 
-unsigned ARMRegisterInfo::getEHExceptionRegister() const {
+unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
   assert(0 && "What is the exception register");
   return 0;
 }
 
-unsigned ARMRegisterInfo::getEHHandlerRegister() const {
+unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
   assert(0 && "What is the exception handler register");
   return 0;
 }
 
-int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }
 
-unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg,
-                                              const MachineFunction &MF) const {
+unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
+                                               const MachineFunction &MF) const {
   switch (Reg) {
   default: break;
   // Return 0 if either register of the pair is a special register.
@@ -1810,7 +1325,7 @@ unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg,
   return 0;
 }
 
-unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg,
+unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
                                              const MachineFunction &MF) const {
   switch (Reg) {
   default: break;
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index e8f4fd8..7fe075a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -14,6 +14,7 @@
 #ifndef ARMREGISTERINFO_H
 #define ARMREGISTERINFO_H
 
+#include "ARM.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "ARMGenRegisterInfo.h.inc"
 
@@ -30,21 +31,28 @@ namespace ARMRI {
   };
 }
 
-struct ARMRegisterInfo : public ARMGenRegisterInfo {
+/// isARMLowRegister - Returns true if the register is low register r0-r7.
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+  using namespace ARM;
+  switch (Reg) {
+  case R0:  case R1:  case R2:  case R3:
+  case R4:  case R5:  case R6:  case R7:
+    return true;
+  default:
+    return false;
+  }
+}
+
+struct ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
   const TargetInstrInfo &TII;
   const ARMSubtarget &STI;
 
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
 public:
-  ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
-
-  /// emitLoadConstPool - Emits a load from constpool to materialize the
-  /// specified immediate.
-  void emitLoadConstPool(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator &MBBI,
-                         unsigned DestReg, int Val,
-                         unsigned Pred, unsigned PredReg,
-                         const TargetInstrInfo *TII, bool isThumb,
-                         DebugLoc dl) const;
+  ARMBaseRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
 
   /// getRegisterNumbering - Given the enum value for some register, e.g.
   /// ARM::LR, return the number that it corresponds to (e.g. 14).
@@ -55,8 +63,6 @@ public:
   static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
 
   /// Code Generation virtual methods...
-  const TargetRegisterClass *
-    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
   const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   const TargetRegisterClass* const*
@@ -79,25 +85,11 @@ public:
   void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
                           MachineFunction &MF) const;
 
-  bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
   bool hasFP(const MachineFunction &MF) const;
 
-  bool hasReservedCallFrame(MachineFunction &MF) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
-
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;
 
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(MachineFunction &MF) const;
@@ -107,17 +99,44 @@ public:
   unsigned getEHHandlerRegister() const;
 
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
-  
+
   bool isLowRegister(unsigned Reg) const;
 
 private:
-  /// FramePtr - ARM physical register used as frame ptr.
-  unsigned FramePtr;
-
   unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
 
   unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+};
+
+struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+public:
+  ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         const TargetInstrInfo *TII, DebugLoc dl,
+                         unsigned DestReg, int Val,
+                         ARMCC::CondCodes Pred = ARMCC::AL,
+                         unsigned PredReg = 0) const;
+
+  /// Code Generation virtual methods...
+  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
 
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  bool hasReservedCallFrame(MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index c3cc7ff..5110b31 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -108,7 +108,8 @@ protected:
 
   bool isThumb() const { return IsThumb; }
   bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
-  bool hasThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+  bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+  bool hasThumb2() const { return ThumbMode >= Thumb2; }
 
   bool isR9Reserved() const { return IsR9Reserved; }
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index f7b8215..2344733 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -95,13 +95,18 @@ ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS)
 }
 
 ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
-  : ARMBaseTargetMachine(M, FS, true), InstrInfo(Subtarget),
+  : ARMBaseTargetMachine(M, FS, true),
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:32-i64:32:32-"
                            "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
                std::string("e-p:32:32-f64:64:64-i64:64:64-"
                            "i16:16:32-i8:8:32-i1:8:32-a:0:32")),
     TLInfo(*this) {
+  // Create the approriate type of Thumb InstrInfo
+  if (Subtarget.hasThumb2())
+    InstrInfo = new Thumb2InstrInfo(Subtarget);
+  else
+    InstrInfo = new Thumb1InstrInfo(Subtarget);
 }
 
 unsigned ARMTargetMachine::getJITMatchQuality() {
@@ -179,7 +184,7 @@ bool ARMBaseTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
 
   return false;
 }
@@ -198,7 +203,7 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -217,7 +222,7 @@ bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -232,7 +237,7 @@ bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -247,7 +252,7 @@ bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 0b49b92..a0df54d 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -22,7 +22,8 @@
 #include "ARMJITInfo.h"
 #include "ARMSubtarget.h"
 #include "ARMISelLowering.h"
-#include "ThumbInstrInfo.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
 
 namespace llvm {
 
@@ -43,7 +44,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             ARMBaseTargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
@@ -112,23 +112,27 @@ public:
 };
 
 /// ThumbTargetMachine - Thumb target machine.
+/// Due to the way architectures are handled, this represents both
+///   Thumb-1 and Thumb-2.
 ///
 class ThumbTargetMachine : public ARMBaseTargetMachine {
-  ThumbInstrInfo      InstrInfo;
-  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMBaseInstrInfo    *InstrInfo;   // either Thumb1InstrInfo or Thumb2InstrInfo
+  const TargetData    DataLayout;   // Calculates type size & alignment
   ARMTargetLowering   TLInfo;
 public:
   ThumbTargetMachine(const Module &M, const std::string &FS);
 
-  virtual const ARMRegisterInfo  *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
+  /// returns either Thumb1RegisterInfo of Thumb2RegisterInfo
+  virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo->getRegisterInfo();
   }
 
-  virtual       ARMTargetLowering *getTargetLowering() const {
+  virtual ARMTargetLowering *getTargetLowering() const {
     return const_cast<ARMTargetLowering*>(&TLInfo);
   }
 
-  virtual const ThumbInstrInfo   *getInstrInfo() const { return &InstrInfo; }
+  /// returns either Thumb1InstrInfo or Thumb2InstrInfo
+  virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 
   static unsigned getJITMatchQuality();
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 400f628a..434a19a 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -82,9 +82,8 @@ namespace {
     bool InCPMode;
   public:
     explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                           const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                           bool V)
-      : AsmPrinter(O, TM, T, OL, V), DW(0), AFI(NULL), MCP(NULL),
+                           const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V), DW(0), AFI(NULL), MCP(NULL),
         InCPMode(false) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
     }
@@ -93,45 +92,50 @@ namespace {
       return "ARM Assembly Printer";
     }
 
-    void printOperand(const MachineInstr *MI, int opNum,
+    void printOperand(const MachineInstr *MI, int OpNum,
                       const char *Modifier = 0);
-    void printSOImmOperand(const MachineInstr *MI, int opNum);
-    void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
-    void printSORegOperand(const MachineInstr *MI, int opNum);
-    void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
-    void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
-    void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
-    void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo);
-    void printAddrMode4Operand(const MachineInstr *MI, int OpNo,
+    void printSOImmOperand(const MachineInstr *MI, int OpNum);
+    void printSOImm2PartOperand(const MachineInstr *MI, int OpNum);
+    void printSORegOperand(const MachineInstr *MI, int OpNum);
+    void printAddrMode2Operand(const MachineInstr *MI, int OpNum);
+    void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNum);
+    void printAddrMode3Operand(const MachineInstr *MI, int OpNum);
+    void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNum);
+    void printAddrMode4Operand(const MachineInstr *MI, int OpNum,
                                const char *Modifier = 0);
-    void printAddrMode5Operand(const MachineInstr *MI, int OpNo,
+    void printAddrMode5Operand(const MachineInstr *MI, int OpNum,
                                const char *Modifier = 0);
-    void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
+    void printAddrMode6Operand(const MachineInstr *MI, int OpNum);
+    void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
                                 const char *Modifier = 0);
-    void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNo);
+    void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum);
 
-    void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
-    void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
+    void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum);
+    void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum,
                                       unsigned Scale);
-    void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNo);
-    void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo);
-    void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo);
-    void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNum);
+    void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNum);
+    void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNum);
+    void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNum);
 
-    void printT2SOImmOperand(const MachineInstr *MI, int opNum);
+    void printT2SOImmOperand(const MachineInstr *MI, int OpNum);
     void printT2SOOperand(const MachineInstr *MI, int OpNum);
-
-    void printPredicateOperand(const MachineInstr *MI, int opNum);
-    void printSBitModifierOperand(const MachineInstr *MI, int opNum);
-    void printPCLabel(const MachineInstr *MI, int opNum);
-    void printRegisterList(const MachineInstr *MI, int opNum);
-    void printCPInstOperand(const MachineInstr *MI, int opNum,
+    void printT2AddrModeImm12Operand(const MachineInstr *MI, int OpNum);
+    void printT2AddrModeImm8Operand(const MachineInstr *MI, int OpNum);
+    void printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, int OpNum);
+    void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum);
+
+    void printPredicateOperand(const MachineInstr *MI, int OpNum);
+    void printSBitModifierOperand(const MachineInstr *MI, int OpNum);
+    void printPCLabel(const MachineInstr *MI, int OpNum);
+    void printRegisterList(const MachineInstr *MI, int OpNum);
+    void printCPInstOperand(const MachineInstr *MI, int OpNum,
                             const char *Modifier);
-    void printJTBlockOperand(const MachineInstr *MI, int opNum);
+    void printJTBlockOperand(const MachineInstr *MI, int OpNum);
 
-    virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+    virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                  unsigned AsmVariant, const char *ExtraCode);
-    virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+    virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
                                        unsigned AsmVariant,
                                        const char *ExtraCode);
 
@@ -232,15 +236,16 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   printVisibility(CurrentFnName, F->getVisibility());
 
   if (AFI->isThumbFunction()) {
-    EmitAlignment(1, F, AFI->getAlign());
+    EmitAlignment(MF.getAlignment(), F, AFI->getAlign());
     O << "\t.code\t16\n";
     O << "\t.thumb_func";
     if (Subtarget->isTargetDarwin())
       O << "\t" << CurrentFnName;
     O << "\n";
     InCPMode = false;
-  } else
-    EmitAlignment(2, F);
+  } else {
+    EmitAlignment(MF.getAlignment(), F);
+  }
 
   O << CurrentFnName << ":\n";
   // Emit pre-function debug information.
@@ -282,9 +287,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                  const char *Modifier) {
-  const MachineOperand &MO = MI->getOperand(opNum);
+  const MachineOperand &MO = MI->getOperand(OpNum);
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
     unsigned Reg = MO.getReg();
@@ -584,6 +589,22 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
   O << "]";
 }
 
+void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+  // FIXME: No support yet for specifying alignment.
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]";
+
+  if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
+    if (MO2.getReg() == 0)
+      O << "!";
+    else
+      O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+  }
+}
+
 void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
                                            const char *Modifier) {
   if (Modifier && strcmp(Modifier, "label") == 0) {
@@ -606,6 +627,8 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) {
   O << "#" << lsb << ", #" << width;
 }
 
+//===--------------------------------------------------------------------===//
+
 void
 ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
@@ -659,6 +682,8 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
   O << "]";
 }
 
+//===--------------------------------------------------------------------===//
+
 /// printT2SOImmOperand - T2SOImm is:
 ///  1. a 4-bit splat control value and 8 bit immediate value
 ///  2. a 5-bit rotate amount and a non-zero 8-bit immediate value
@@ -694,47 +719,110 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum) {
   O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
 }
 
+void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
+                                                int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+  unsigned OffImm = MO2.getImm();
+  if (OffImm)  // Don't print +0.
+    O << ", #+" << OffImm;
+  O << "]";
+}
 
-void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) {
-  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm();
+void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI,
+                                               int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #+" << OffImm;
+  O << "]";
+}
+
+void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI,
+                                                     int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else if (OffImm > 0)
+    O << "#+" << OffImm;
+}
+
+void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI,
+                                                int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+  const MachineOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+  if (MO2.getReg()) {
+    O << ", +"
+      << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+
+    unsigned ShAmt = MO3.getImm();
+    if (ShAmt) {
+      assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+      O << ", lsl #" << ShAmt;
+    }
+  }
+  O << "]";
+}
+
+
+//===--------------------------------------------------------------------===//
+
+void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
   if (CC != ARMCC::AL)
     O << ARMCondCodeToString(CC);
 }
 
-void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int opNum){
-  unsigned Reg = MI->getOperand(opNum).getReg();
+void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){
+  unsigned Reg = MI->getOperand(OpNum).getReg();
   if (Reg) {
     assert(Reg == ARM::CPSR && "Expect ARM CPSR register!");
     O << 's';
   }
 }
 
-void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) {
-  int Id = (int)MI->getOperand(opNum).getImm();
+void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) {
+  int Id = (int)MI->getOperand(OpNum).getImm();
   O << TAI->getPrivateGlobalPrefix() << "PC" << Id;
 }
 
-void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) {
+void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) {
   O << "{";
-  for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
     printOperand(MI, i);
     if (i != e-1) O << ", ";
   }
   O << "}";
 }
 
-void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
+void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNum,
                                        const char *Modifier) {
   assert(Modifier && "This operand only works with a modifier!");
   // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
   // data itself.
   if (!strcmp(Modifier, "label")) {
-    unsigned ID = MI->getOperand(OpNo).getImm();
+    unsigned ID = MI->getOperand(OpNum).getImm();
     O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
       << '_' << ID << ":\n";
   } else {
     assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
-    unsigned CPI = MI->getOperand(OpNo).getIndex();
+    unsigned CPI = MI->getOperand(OpNum).getIndex();
 
     const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
     
@@ -746,9 +834,9 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
   }
 }
 
-void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
-  const MachineOperand &MO1 = MI->getOperand(OpNo);
-  const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id
+void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
   unsigned JTI = MO1.getIndex();
   O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
     << '_' << JTI << '_' << MO2.getImm() << ":\n";
@@ -787,7 +875,7 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
 }
 
 
-bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                     unsigned AsmVariant, const char *ExtraCode){
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
@@ -797,10 +885,10 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     default: return true;  // Unknown modifier.
     case 'a': // Don't print "#" before a global var name or constant.
     case 'c': // Don't print "$" before a global var name or constant.
-      printOperand(MI, OpNo, "no_hash");
+      printOperand(MI, OpNum, "no_hash");
       return false;
     case 'P': // Print a VFP double precision register.
-      printOperand(MI, OpNo);
+      printOperand(MI, OpNum);
       return false;
     case 'Q':
       if (TM.getTargetData()->isLittleEndian())
@@ -812,24 +900,24 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       // Fallthrough
     case 'H': // Write second word of DI / DF reference.  
       // Verify that this operand has two consecutive registers.
-      if (!MI->getOperand(OpNo).isReg() ||
-          OpNo+1 == MI->getNumOperands() ||
-          !MI->getOperand(OpNo+1).isReg())
+      if (!MI->getOperand(OpNum).isReg() ||
+          OpNum+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNum+1).isReg())
         return true;
-      ++OpNo;   // Return the high-part.
+      ++OpNum;   // Return the high-part.
     }
   }
   
-  printOperand(MI, OpNo);
+  printOperand(MI, OpNum);
   return false;
 }
 
 bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                          unsigned OpNo, unsigned AsmVariant,
+                                          unsigned OpNum, unsigned AsmVariant,
                                           const char *ExtraCode) {
   if (ExtraCode && ExtraCode[0])
     return true; // Unknown modifier.
-  printAddrMode2Operand(MI, OpNo);
+  printAddrMode2Operand(MI, OpNum);
   return false;
 }
 
@@ -1138,9 +1226,8 @@ bool ARMAsmPrinter::doFinalization(Module &M) {
 ///
 FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
                                              ARMBaseTargetMachine &tm,
-                                             CodeGenOpt::Level OptLevel,
                                              bool verbose) {
-  return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 namespace {
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index e665ed9..9c46fe0 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -24,5 +24,10 @@ add_llvm_target(ARMCodeGen
   ARMSubtarget.cpp
   ARMTargetAsmInfo.cpp
   ARMTargetMachine.cpp
-  ThumbInstrInfo.cpp
+  Thumb1InstrInfo.cpp
+  Thumb1RegisterInfo.cpp
+  Thumb2InstrInfo.cpp
+  Thumb2RegisterInfo.cpp
   )
+
+target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 4223699..f3377f9 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -530,3 +530,10 @@ those operations and the ARMv6 scalar versions.
 
 ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
 ARMInstrInfo::commuteInstruction() to support it.
+
+//===---------------------------------------------------------------------===//
+
+Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting
+LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g.
+ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg)
+while ARMConstantIslandPass only need to worry about LDR (literal).
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
new file mode 100644
index 0000000..e13a811
--- /dev/null
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -0,0 +1,304 @@
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "Thumb1InstrInfo.h"
+
+using namespace llvm;
+
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+bool Thumb1InstrInfo::isMoveInstr(const MachineInstr &MI,
+                                  unsigned &SrcReg, unsigned &DstReg,
+                                  unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+  unsigned oc = MI.getOpcode();
+  switch (oc) {
+  default:
+    return false;
+  case ARM::tMOVr:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2hir:
+    assert(MI.getDesc().getNumOperands() >= 2 &&
+           MI.getOperand(0).isReg() &&
+           MI.getOperand(1).isReg() &&
+           "Invalid Thumb MOV instruction");
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+}
+
+unsigned Thumb1InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::tRestore:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned Thumb1InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::tSpill:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   const TargetRegisterClass *DestRC,
+                                   const TargetRegisterClass *SrcRC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (DestRC == ARM::GPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
+      return true;
+    }
+  } else if (DestRC == ARM::tGPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool Thumb1InstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+                     const SmallVectorImpl<unsigned> &Ops) const {
+  if (Ops.size() != 1) return false;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default: break;
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        return false;
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        return false;
+    }
+    return true;
+  }
+  }
+
+  return false;
+}
+
+void Thumb1InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tSpill))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI).addImm(0);
+  }
+}
+
+void Thumb1InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                     bool isKill,
+                                     SmallVectorImpl<MachineOperand> &Addr,
+                                     const TargetRegisterClass *RC,
+                                     SmallVectorImpl<MachineInstr*> &NewMIs) const{
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+
+  assert(RC == ARM::GPRRegisterClass && "Unknown regclass!");
+  if (RC == ARM::GPRRegisterClass) {
+    Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
+  }
+
+  MachineInstrBuilder MIB =
+    BuildMI(MF, DL,  get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+void Thumb1InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+      .addFrameIndex(FI).addImm(0);
+  }
+}
+
+void Thumb1InstrInfo::
+loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                SmallVectorImpl<MachineOperand> &Addr,
+                const TargetRegisterClass *RC,
+                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+
+  if (RC == ARM::GPRRegisterClass) {
+    Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+bool Thumb1InstrInfo::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    MIB.addReg(Reg, RegState::Kill);
+  }
+  return true;
+}
+
+bool Thumb1InstrInfo::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  if (CSI.empty())
+    return false;
+
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (Reg == ARM::LR) {
+      // Special epilogue for vararg functions. See emitEpilogue
+      if (isVarArg)
+        continue;
+      Reg = ARM::PC;
+      PopMI->setDesc(get(ARM::tPOP_RET));
+      MI = MBB.erase(MI);
+    }
+    PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
+  }
+
+  // It's illegal to emit pop instruction without operands.
+  if (PopMI->getNumOperands() > 0)
+    MBB.insert(MI, PopMI);
+
+  return true;
+}
+
+MachineInstr *Thumb1InstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                      const SmallVectorImpl<unsigned> &Ops, int FI) const {
+  if (Ops.size() != 1) return NULL;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  MachineInstr *NewMI = NULL;
+  switch (Opc) {
+  default: break;
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        break;
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        break;
+      bool isDead = MI->getOperand(0).isDead();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
+        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addFrameIndex(FI).addImm(0);
+    }
+    break;
+  }
+  }
+
+  return NewMI;
+}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
new file mode 100644
index 0000000..1bfa1d0
--- /dev/null
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -0,0 +1,93 @@
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1INSTRUCTIONINFO_H
+#define THUMB1INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class Thumb1InstrInfo : public ARMBaseInstrInfo {
+  Thumb1RegisterInfo RI;
+public:
+  explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI) const;
+
+  bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+  unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DestReg, unsigned SrcReg,
+                            const TargetRegisterClass *DestRC,
+                            const TargetRegisterClass *SrcRC) const;
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+
+  void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC) const;
+
+  void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      int FrameIndex) const;
+ 
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr* LoadMI) const {
+    return 0;
+  }
+};
+}
+
+#endif // THUMB1INSTRUCTIONINFO_H
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
new file mode 100644
index 0000000..92f01d1
--- /dev/null
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -0,0 +1,755 @@
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ThumbRegScavenging("enable-thumb-reg-scavenging",
+                   cl::Hidden,
+                   cl::desc("Enable register scavenging on Thumb"));
+
+Thumb1RegisterInfo::Thumb1RegisterInfo(const TargetInstrInfo &tii,
+                                       const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator &MBBI,
+                                           unsigned DestReg, int Val,
+                                           const TargetInstrInfo *TII,
+                                           DebugLoc dl) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg)
+    .addConstantPoolIndex(Idx);
+}
+
+const TargetRegisterClass*
+Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
+  if (isARMLowRegister(Reg))
+    return ARM::tGPRRegisterClass;
+  switch (Reg) {
+   default:
+    break;
+   case ARM::R8:  case ARM::R9:  case ARM::R10:  case ARM::R11:
+   case ARM::R12: case ARM::SP:  case ARM::LR:   case ARM::PC:
+    return ARM::GPRRegisterClass;
+  }
+
+  return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
+}
+
+bool
+Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+  return ThumbRegScavenging;
+}
+
+bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, unsigned BaseReg,
+                              int NumBytes, bool CanChangeCC,
+                              const TargetInstrInfo &TII,
+                              const Thumb1RegisterInfo& MRI,
+                              DebugLoc dl) {
+    bool isHigh = !isARMLowRegister(DestReg) ||
+                  (BaseReg != 0 && !isARMLowRegister(BaseReg));
+    bool isSub = false;
+    // Subtract doesn't have high register version. Load the negative value
+    // if either base or dest register is a high register. Also, if do not
+    // issue sub as part of the sequence if condition register is to be
+    // preserved.
+    if (NumBytes < 0 && !isHigh && CanChangeCC) {
+      isSub = true;
+      NumBytes = -NumBytes;
+    }
+    unsigned LdReg = DestReg;
+    if (DestReg == ARM::SP) {
+      assert(BaseReg == ARM::SP && "Unexpected!");
+      LdReg = ARM::R3;
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R3, RegState::Kill);
+    }
+
+    if (NumBytes <= 255 && NumBytes >= 0)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+    else if (NumBytes < 0 && NumBytes >= -255) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
+        .addReg(LdReg, RegState::Kill);
+    } else
+      MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl);
+
+    // Emit add / sub.
+    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+    const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl,
+                                            TII.get(Opc), DestReg);
+    if (DestReg == ARM::SP || isSub)
+      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+    else
+      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+    if (DestReg == ARM::SP)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+        .addReg(ARM::R12, RegState::Kill);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+                          unsigned NumBits, unsigned Scale) {
+  unsigned NumMIs = 0;
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+  if (Opc == ARM::tADDrSPi) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    NumMIs++;
+    NumBits = 8;
+    Scale = 1;  // Followed by a number of tADDi8.
+    Chunk = ((1 << NumBits) - 1) * Scale;
+  }
+
+  NumMIs += Bytes / Chunk;
+  if ((Bytes % Chunk) != 0)
+    NumMIs++;
+  if (ExtraOpc)
+    NumMIs++;
+  return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI,
+                               unsigned DestReg, unsigned BaseReg,
+                               int NumBytes, const TargetInstrInfo &TII,
+                               const Thumb1RegisterInfo& MRI,
+                               DebugLoc dl) {
+  bool isSub = NumBytes < 0;
+  unsigned Bytes = (unsigned)NumBytes;
+  if (isSub) Bytes = -NumBytes;
+  bool isMul4 = (Bytes & 3) == 0;
+  bool isTwoAddr = false;
+  bool DstNotEqBase = false;
+  unsigned NumBits = 1;
+  unsigned Scale = 1;
+  int Opc = 0;
+  int ExtraOpc = 0;
+
+  if (DestReg == BaseReg && BaseReg == ARM::SP) {
+    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+    NumBits = 7;
+    Scale = 4;
+    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+    isTwoAddr = true;
+  } else if (!isSub && BaseReg == ARM::SP) {
+    // r1 = add sp, 403
+    // =>
+    // r1 = add sp, 100 * 4
+    // r1 = add r1, 3
+    if (!isMul4) {
+      Bytes &= ~3;
+      ExtraOpc = ARM::tADDi3;
+    }
+    NumBits = 8;
+    Scale = 4;
+    Opc = ARM::tADDrSPi;
+  } else {
+    // sp = sub sp, c
+    // r1 = sub sp, c
+    // r8 = sub sp, c
+    if (DestReg != BaseReg)
+      DstNotEqBase = true;
+    NumBits = 8;
+    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+    isTwoAddr = true;
+  }
+
+  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+  if (NumMIs > Threshold) {
+    // This will expand into too many instructions. Load the immediate from a
+    // constpool entry.
+    emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
+                             MRI, dl);
+    return;
+  }
+
+  if (DstNotEqBase) {
+    if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+      unsigned Chunk = (1 << 3) - 1;
+      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+      Bytes -= ThisVal;
+      BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+        .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+        .addReg(BaseReg, RegState::Kill);
+    }
+    BaseReg = DestReg;
+  }
+
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+  while (Bytes) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    ThisVal /= Scale;
+    // Build the new tADD / tSUB.
+    if (isTwoAddr)
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(DestReg).addImm(ThisVal);
+    else {
+      bool isKill = BaseReg != ARM::SP;
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+      BaseReg = DestReg;
+
+      if (Opc == ARM::tADDrSPi) {
+        // r4 = add sp, imm
+        // r4 = add r4, imm
+        // ...
+        NumBits = 8;
+        Scale = 1;
+        Chunk = ((1 << NumBits) - 1) * Scale;
+        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+        isTwoAddr = true;
+      }
+    }
+  }
+
+  if (ExtraOpc)
+    BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
+      .addReg(DestReg, RegState::Kill)
+      .addImm(((unsigned)NumBytes) & 3);
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         const TargetInstrInfo &TII, DebugLoc dl,
+                         const Thumb1RegisterInfo &MRI,
+                         int NumBytes) {
+  emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI, dl);
+}
+
+void Thumb1RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (!hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
+      } else {
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(MBB, I, TII, dl, *this, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, int Imm,
+                              const TargetInstrInfo &TII,
+                              const Thumb1RegisterInfo& MRI,
+                              DebugLoc dl) {
+  bool isSub = Imm < 0;
+  if (isSub) Imm = -Imm;
+
+  int Chunk = (1 << 8) - 1;
+  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+  Imm -= ThisVal;
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
+  if (Imm > 0)
+    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+  if (isSub)
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
+      .addReg(DestReg, RegState::Kill);
+}
+
+void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                             int SPAdj, RegScavenger *RS) const{
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc dl = MI.getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MF.getFrameInfo()->getStackSize() + SPAdj;
+
+  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (hasFP(MF)) {
+    assert(SPAdj == 0 && "Unexpected");
+    // There is alloca()'s in this function, must reference off the frame
+    // pointer instead.
+    FrameReg = getFrameRegister(MF);
+    Offset -= AFI->getFramePtrSpillOffset();
+  }
+
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+  if (Opcode == ARM::tADDrSPi) {
+    Offset += MI.getOperand(i+1).getImm();
+
+    // Can't use tADDrSPi if it's based off the frame pointer.
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (FrameReg != ARM::SP) {
+      Opcode = ARM::tADDi3;
+      MI.setDesc(TII.get(ARM::tADDi3));
+      NumBits = 3;
+    } else {
+      NumBits = 8;
+      Scale = 4;
+      assert((Offset & 3) == 0 &&
+             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+    }
+
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVhir2lor));
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(i+1);
+      return;
+    }
+
+    // Common case: small offset, fits into instruction.
+    unsigned Mask = (1 << NumBits) - 1;
+    if (((Offset / Scale) & ~Mask) == 0) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+      return;
+    }
+
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+    // MI would expand into a large number of instructions. Don't try to
+    // simplify the immediate.
+    if (NumMIs > 2) {
+      emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
+                                *this, dl);
+      MBB.erase(II);
+      return;
+    }
+
+    if (Offset > 0) {
+      // Translate r0 = add sp, imm to
+      // r0 = add sp, 255*4
+      // r0 = add r0, (imm - 255*4)
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Mask);
+      Offset = (Offset - Mask * Scale);
+      MachineBasicBlock::iterator NII = next(II);
+      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
+                                *this, dl);
+    } else {
+      // Translate r0 = add sp, -imm to
+      // r0 = -imm (this is then translated into a series of instructons)
+      // r0 = add r0, sp
+      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+      MI.setDesc(TII.get(ARM::tADDhirr));
+      MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
+      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+    }
+    return;
+  } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrModeT1_s: {
+      ImmIdx = i+1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+      Scale = 4;
+      break;
+    }
+    default:
+      assert(0 && "Unsupported addressing mode!");
+      abort();
+      break;
+    }
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+
+    // Common case: small offset, fits into instruction.
+    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with sp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      return;
+    }
+
+    bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
+    if (AddrMode == ARMII::AddrModeT1_s) {
+      // Thumb tLDRspi, tSTRspi. These will change to instructions that use
+      // a different base register.
+      NumBits = 5;
+      Mask = (1 << NumBits) - 1;
+    }
+    // If this is a thumb spill / restore, we will be using a constpool load to
+    // materialize the offset.
+    if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore)
+      ImmOp.ChangeToImmediate(0);
+    else {
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask*Scale);
+    }
+  }
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert(Offset && "This code isn't needed if offset already handled!");
+
+  if (Desc.mayLoad()) {
+    // Use the destination register to materialize sp + offset.
+    unsigned TmpReg = MI.getOperand(0).getReg();
+    bool UseRR = false;
+    if (Opcode == ARM::tRestore) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                 Offset, false, TII, *this, dl);
+      else {
+        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
+        UseRR = true;
+      }
+    } else
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                *this, dl);
+    MI.setDesc(TII.get(ARM::tLDR));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)
+      // Use [reg, reg] addrmode.
+      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+    else  // tLDR has an extra register operand.
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+  } else if (Desc.mayStore()) {
+    // FIXME! This is horrific!!! We need register scavenging.
+    // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+    // also a ABI register so it's possible that is is the register that is
+    // being storing here. If that's the case, we do the following:
+    // r12 = r2
+    // Use r2 to materialize sp + offset
+    // str r3, r2
+    // r2 = r12
+    unsigned ValReg = MI.getOperand(0).getReg();
+    unsigned TmpReg = ARM::R3;
+    bool UseRR = false;
+    if (ValReg == ARM::R3) {
+      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R2, RegState::Kill);
+      TmpReg = ARM::R2;
+    }
+    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R3, RegState::Kill);
+    if (Opcode == ARM::tSpill) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                 Offset, false, TII, *this, dl);
+      else {
+        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
+        UseRR = true;
+      }
+    } else
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                *this, dl);
+    MI.setDesc(TII.get(ARM::tSTR));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)  // Use [reg, reg] addrmode.
+      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+    else // tSTR has an extra register operand.
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+
+    MachineBasicBlock::iterator NII = next(II);
+    if (ValReg == ARM::R3)
+      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
+        .addReg(ARM::R12, RegState::Kill);
+    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+        .addReg(ARM::R12, RegState::Kill);
+  } else
+    assert(false && "Unexpected opcode!");
+}
+
+void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = (MBBI != MBB.end() ?
+                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+  // Check if R3 is live in. It might have to be used as a scratch register.
+  for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
+         E = MF.getRegInfo().livein_end(); I != E; ++I) {
+    if (I->first == ARM::R3) {
+      AFI->setR3IsLiveIn(true);
+      break;
+    }
+  }
+
+  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+  NumBytes = (NumBytes + 3) & ~3;
+  MFI->setStackSize(NumBytes);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+    ++MBBI;
+    if (MBBI != MBB.end())
+      dl = MBBI->getDebugLoc();
+  }
+
+  // Darwin ABI requires FP to point to the stack slot that contains the
+  // previous FP.
+  if (STI.isTargetDarwin() || hasFP(MF)) {
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0);
+  }
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+  NumBytes = DPRCSOffset;
+  if (NumBytes) {
+    // Insert it after all the callee-save spills.
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
+  }
+
+  if (STI.isTargetELF() && hasFP(MF)) {
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+  }
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+  return (MI->getOpcode() == ARM::tRestore &&
+          MI->getOperand(1).isFI() &&
+          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+}
+
+void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
+                                      MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  assert((MBBI->getOpcode() == ARM::tBX_RET ||
+          MBBI->getOpcode() == ARM::tPOP_RET) &&
+         "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / FLDD.
+    const unsigned *CSRegs = getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+      if (!isCSRestore(MBBI, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    if (hasFP(MF)) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot or target is ELF and the function has FP.
+      if (NumBytes)
+        emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
+                                  TII, *this, dl);
+      else
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
+          .addReg(FramePtr);
+    } else {
+      if (MBBI->getOpcode() == ARM::tBX_RET &&
+          &MBB.front() != MBBI &&
+          prior(MBBI)->getOpcode() == ARM::tPOP) {
+        MachineBasicBlock::iterator PMBBI = prior(MBBI);
+        emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes);
+      } else
+        emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
+    }
+  }
+
+  if (VARegSaveSize) {
+    // Epilogue for vararg functions: pop LR to R3 and branch off it.
+    // FIXME: Verify this is still ok when R3 is no longer being reserved.
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
+
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
+
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
+    MBB.erase(MBBI);
+  }
+}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
new file mode 100644
index 0000000..6d4f1f0
--- /dev/null
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -0,0 +1,60 @@
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1REGISTERINFO_H
+#define THUMB1REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+public:
+  Thumb1RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         unsigned DestReg, int Val,
+                         const TargetInstrInfo *TII,
+                         DebugLoc dl) const;
+
+  /// Code Generation virtual methods...
+  const TargetRegisterClass *
+    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+
+  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  bool hasReservedCallFrame(MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+};
+}
+
+#endif // THUMB1REGISTERINFO_H
diff --git a/lib/Target/ARM/ThumbInstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 075d940..35d09fd 100644
--- a/lib/Target/ARM/ThumbInstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ThumbInstrInfo.cpp - Thumb Instruction Information --------*- C++ -*-===//
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the Thumb implementation of the TargetInstrInfo class.
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
 //
 //===----------------------------------------------------------------------===//
 
@@ -18,17 +18,17 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/ADT/SmallVector.h"
-#include "ThumbInstrInfo.h"
+#include "Thumb2InstrInfo.h"
 
 using namespace llvm;
 
-ThumbInstrInfo::ThumbInstrInfo(const ARMSubtarget &STI)
-  : ARMBaseInstrInfo(STI) {
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
-bool ThumbInstrInfo::isMoveInstr(const MachineInstr &MI,
-                                 unsigned &SrcReg, unsigned &DstReg,
-                                 unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+bool Thumb2InstrInfo::isMoveInstr(const MachineInstr &MI,
+                                  unsigned &SrcReg, unsigned &DstReg,
+                                  unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
   SrcSubIdx = DstSubIdx = 0; // No sub-registers.
 
   unsigned oc = MI.getOpcode();
@@ -50,8 +50,8 @@ bool ThumbInstrInfo::isMoveInstr(const MachineInstr &MI,
   }
 }
 
-unsigned ThumbInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                             int &FrameIndex) const {
+unsigned Thumb2InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
   switch (MI->getOpcode()) {
   default: break;
   // FIXME: Thumb2
@@ -67,8 +67,8 @@ unsigned ThumbInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-unsigned ThumbInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                            int &FrameIndex) const {
+unsigned Thumb2InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
   switch (MI->getOpcode()) {
   default: break;
   // FIXME: Thumb2
@@ -84,11 +84,11 @@ unsigned ThumbInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-bool ThumbInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I,
-                                  unsigned DestReg, unsigned SrcReg,
-                                  const TargetRegisterClass *DestRC,
-                                  const TargetRegisterClass *SrcRC) const {
+bool Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   const TargetRegisterClass *DestRC,
+                                   const TargetRegisterClass *SrcRC) const {
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
@@ -114,7 +114,38 @@ bool ThumbInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
   return false;
 }
 
-void ThumbInstrInfo::
+bool Thumb2InstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+                     const SmallVectorImpl<unsigned> &Ops) const {
+  if (Ops.size() != 1) return false;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default: break;
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        return false;
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        return false;
+    }
+    return true;
+  }
+  }
+
+  return false;
+}
+
+void Thumb2InstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
                     const TargetRegisterClass *RC) const {
@@ -131,11 +162,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
-void ThumbInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
-                                    bool isKill,
-                                    SmallVectorImpl<MachineOperand> &Addr,
-                                    const TargetRegisterClass *RC,
-                                   SmallVectorImpl<MachineInstr*> &NewMIs) const{
+void Thumb2InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                     bool isKill,
+                                     SmallVectorImpl<MachineOperand> &Addr,
+                                     const TargetRegisterClass *RC,
+                                     SmallVectorImpl<MachineInstr*> &NewMIs) const{
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
 
@@ -153,7 +184,7 @@ void ThumbInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
   return;
 }
 
-void ThumbInstrInfo::
+void Thumb2InstrInfo::
 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      unsigned DestReg, int FI,
                      const TargetRegisterClass *RC) const {
@@ -169,7 +200,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   }
 }
 
-void ThumbInstrInfo::
+void Thumb2InstrInfo::
 loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                 SmallVectorImpl<MachineOperand> &Addr,
                 const TargetRegisterClass *RC,
@@ -189,7 +220,7 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   return;
 }
 
-bool ThumbInstrInfo::
+bool Thumb2InstrInfo::
 spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI,
                           const std::vector<CalleeSavedInfo> &CSI) const {
@@ -209,7 +240,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-bool ThumbInstrInfo::
+bool Thumb2InstrInfo::
 restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MI,
                             const std::vector<CalleeSavedInfo> &CSI) const {
@@ -240,11 +271,10 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-MachineInstr *ThumbInstrInfo::
+MachineInstr *Thumb2InstrInfo::
 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                       const SmallVectorImpl<unsigned> &Ops, int FI) const {
   if (Ops.size() != 1) return NULL;
-  const ARMRegisterInfo &RI = getRegisterInfo();
 
   unsigned OpNum = Ops[0];
   unsigned Opc = MI->getOpcode();
@@ -258,7 +288,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     if (OpNum == 0) { // move -> store
       unsigned SrcReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
-      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+      if (RI.isPhysicalRegister(SrcReg) && !isARMLowRegister(SrcReg))
         // tSpill cannot take a high register operand.
         break;
       NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
@@ -266,7 +296,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
         .addFrameIndex(FI).addImm(0);
     } else {          // move -> load
       unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+      if (RI.isPhysicalRegister(DstReg) && !isARMLowRegister(DstReg))
         // tRestore cannot target a high register operand.
         break;
       bool isDead = MI->getOperand(0).isDead();
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
new file mode 100644
index 0000000..84dcb49
--- /dev/null
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -0,0 +1,93 @@
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2INSTRUCTIONINFO_H
+#define THUMB2INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class Thumb2InstrInfo : public ARMBaseInstrInfo {
+  Thumb2RegisterInfo RI;
+public:
+  explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI) const;
+
+  bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+  unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DestReg, unsigned SrcReg,
+                            const TargetRegisterClass *DestRC,
+                            const TargetRegisterClass *SrcRC) const;
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+
+  void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC) const;
+
+  void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      int FrameIndex) const;
+
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr* LoadMI) const {
+    return 0;
+  }
+};
+}
+
+#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
new file mode 100644
index 0000000..0f0c0e4
--- /dev/null
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -0,0 +1,755 @@
+//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+Thumb2RegScavenging("enable-thumb2-reg-scavenging",
+                    cl::Hidden,
+                    cl::desc("Enable register scavenging on Thumb-2"));
+
+Thumb2RegisterInfo::Thumb2RegisterInfo(const TargetInstrInfo &tii,
+                                       const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator &MBBI,
+                                           unsigned DestReg, int Val,
+                                           const TargetInstrInfo *TII,
+                                           DebugLoc dl) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII->get(ARM::tLDRcp), DestReg)
+    .addConstantPoolIndex(Idx);
+}
+
+const TargetRegisterClass*
+Thumb2RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
+  if (isARMLowRegister(Reg))
+    return ARM::tGPRRegisterClass;
+  switch (Reg) {
+   default:
+    break;
+   case ARM::R8:  case ARM::R9:  case ARM::R10:  case ARM::R11:
+   case ARM::R12: case ARM::SP:  case ARM::LR:   case ARM::PC:
+    return ARM::GPRRegisterClass;
+  }
+
+  return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
+}
+
+bool
+Thumb2RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+  return Thumb2RegScavenging;
+}
+
+bool Thumb2RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, unsigned BaseReg,
+                              int NumBytes, bool CanChangeCC,
+                              const TargetInstrInfo &TII,
+                              const Thumb2RegisterInfo& MRI,
+                              DebugLoc dl) {
+    bool isHigh = !isARMLowRegister(DestReg) ||
+                  (BaseReg != 0 && !isARMLowRegister(BaseReg));
+    bool isSub = false;
+    // Subtract doesn't have high register version. Load the negative value
+    // if either base or dest register is a high register. Also, if do not
+    // issue sub as part of the sequence if condition register is to be
+    // preserved.
+    if (NumBytes < 0 && !isHigh && CanChangeCC) {
+      isSub = true;
+      NumBytes = -NumBytes;
+    }
+    unsigned LdReg = DestReg;
+    if (DestReg == ARM::SP) {
+      assert(BaseReg == ARM::SP && "Unexpected!");
+      LdReg = ARM::R3;
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R3, RegState::Kill);
+    }
+
+    if (NumBytes <= 255 && NumBytes >= 0)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+    else if (NumBytes < 0 && NumBytes >= -255) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
+        .addReg(LdReg, RegState::Kill);
+    } else
+      MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, &TII, dl);
+
+    // Emit add / sub.
+    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+    const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl,
+                                            TII.get(Opc), DestReg);
+    if (DestReg == ARM::SP || isSub)
+      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+    else
+      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+    if (DestReg == ARM::SP)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+        .addReg(ARM::R12, RegState::Kill);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+                          unsigned NumBits, unsigned Scale) {
+  unsigned NumMIs = 0;
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+  if (Opc == ARM::tADDrSPi) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    NumMIs++;
+    NumBits = 8;
+    Scale = 1;  // Followed by a number of tADDi8.
+    Chunk = ((1 << NumBits) - 1) * Scale;
+  }
+
+  NumMIs += Bytes / Chunk;
+  if ((Bytes % Chunk) != 0)
+    NumMIs++;
+  if (ExtraOpc)
+    NumMIs++;
+  return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI,
+                               unsigned DestReg, unsigned BaseReg,
+                               int NumBytes, const TargetInstrInfo &TII,
+                               const Thumb2RegisterInfo& MRI,
+                               DebugLoc dl) {
+  bool isSub = NumBytes < 0;
+  unsigned Bytes = (unsigned)NumBytes;
+  if (isSub) Bytes = -NumBytes;
+  bool isMul4 = (Bytes & 3) == 0;
+  bool isTwoAddr = false;
+  bool DstNotEqBase = false;
+  unsigned NumBits = 1;
+  unsigned Scale = 1;
+  int Opc = 0;
+  int ExtraOpc = 0;
+
+  if (DestReg == BaseReg && BaseReg == ARM::SP) {
+    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+    NumBits = 7;
+    Scale = 4;
+    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+    isTwoAddr = true;
+  } else if (!isSub && BaseReg == ARM::SP) {
+    // r1 = add sp, 403
+    // =>
+    // r1 = add sp, 100 * 4
+    // r1 = add r1, 3
+    if (!isMul4) {
+      Bytes &= ~3;
+      ExtraOpc = ARM::tADDi3;
+    }
+    NumBits = 8;
+    Scale = 4;
+    Opc = ARM::tADDrSPi;
+  } else {
+    // sp = sub sp, c
+    // r1 = sub sp, c
+    // r8 = sub sp, c
+    if (DestReg != BaseReg)
+      DstNotEqBase = true;
+    NumBits = 8;
+    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+    isTwoAddr = true;
+  }
+
+  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+  if (NumMIs > Threshold) {
+    // This will expand into too many instructions. Load the immediate from a
+    // constpool entry.
+    emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
+                             MRI, dl);
+    return;
+  }
+
+  if (DstNotEqBase) {
+    if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+      unsigned Chunk = (1 << 3) - 1;
+      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+      Bytes -= ThisVal;
+      BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+        .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+        .addReg(BaseReg, RegState::Kill);
+    }
+    BaseReg = DestReg;
+  }
+
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+  while (Bytes) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    ThisVal /= Scale;
+    // Build the new tADD / tSUB.
+    if (isTwoAddr)
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(DestReg).addImm(ThisVal);
+    else {
+      bool isKill = BaseReg != ARM::SP;
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+      BaseReg = DestReg;
+
+      if (Opc == ARM::tADDrSPi) {
+        // r4 = add sp, imm
+        // r4 = add r4, imm
+        // ...
+        NumBits = 8;
+        Scale = 1;
+        Chunk = ((1 << NumBits) - 1) * Scale;
+        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+        isTwoAddr = true;
+      }
+    }
+  }
+
+  if (ExtraOpc)
+    BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
+      .addReg(DestReg, RegState::Kill)
+      .addImm(((unsigned)NumBytes) & 3);
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         const TargetInstrInfo &TII, DebugLoc dl,
+                         const Thumb2RegisterInfo &MRI,
+                         int NumBytes) {
+  emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI, dl);
+}
+
+void Thumb2RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (!hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
+      } else {
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(MBB, I, TII, dl, *this, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, int Imm,
+                              const TargetInstrInfo &TII,
+                              const Thumb2RegisterInfo& MRI,
+                              DebugLoc dl) {
+  bool isSub = Imm < 0;
+  if (isSub) Imm = -Imm;
+
+  int Chunk = (1 << 8) - 1;
+  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+  Imm -= ThisVal;
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
+  if (Imm > 0)
+    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+  if (isSub)
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
+      .addReg(DestReg, RegState::Kill);
+}
+
+void Thumb2RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                             int SPAdj, RegScavenger *RS) const{
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc dl = MI.getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MF.getFrameInfo()->getStackSize() + SPAdj;
+
+  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (hasFP(MF)) {
+    assert(SPAdj == 0 && "Unexpected");
+    // There is alloca()'s in this function, must reference off the frame
+    // pointer instead.
+    FrameReg = getFrameRegister(MF);
+    Offset -= AFI->getFramePtrSpillOffset();
+  }
+
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+  if (Opcode == ARM::tADDrSPi) {
+    Offset += MI.getOperand(i+1).getImm();
+
+    // Can't use tADDrSPi if it's based off the frame pointer.
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (FrameReg != ARM::SP) {
+      Opcode = ARM::tADDi3;
+      MI.setDesc(TII.get(ARM::tADDi3));
+      NumBits = 3;
+    } else {
+      NumBits = 8;
+      Scale = 4;
+      assert((Offset & 3) == 0 &&
+             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+    }
+
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVhir2lor));
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(i+1);
+      return;
+    }
+
+    // Common case: small offset, fits into instruction.
+    unsigned Mask = (1 << NumBits) - 1;
+    if (((Offset / Scale) & ~Mask) == 0) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+      return;
+    }
+
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+    // MI would expand into a large number of instructions. Don't try to
+    // simplify the immediate.
+    if (NumMIs > 2) {
+      emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
+                                *this, dl);
+      MBB.erase(II);
+      return;
+    }
+
+    if (Offset > 0) {
+      // Translate r0 = add sp, imm to
+      // r0 = add sp, 255*4
+      // r0 = add r0, (imm - 255*4)
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Mask);
+      Offset = (Offset - Mask * Scale);
+      MachineBasicBlock::iterator NII = next(II);
+      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
+                                *this, dl);
+    } else {
+      // Translate r0 = add sp, -imm to
+      // r0 = -imm (this is then translated into a series of instructons)
+      // r0 = add r0, sp
+      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+      MI.setDesc(TII.get(ARM::tADDhirr));
+      MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
+      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+    }
+    return;
+  } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrModeT1_s: {
+      ImmIdx = i+1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+      Scale = 4;
+      break;
+    }
+    default:
+      assert(0 && "Unsupported addressing mode!");
+      abort();
+      break;
+    }
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+
+    // Common case: small offset, fits into instruction.
+    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with sp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      return;
+    }
+
+    bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
+    if (AddrMode == ARMII::AddrModeT1_s) {
+      // Thumb tLDRspi, tSTRspi. These will change to instructions that use
+      // a different base register.
+      NumBits = 5;
+      Mask = (1 << NumBits) - 1;
+    }
+    // If this is a thumb spill / restore, we will be using a constpool load to
+    // materialize the offset.
+    if (AddrMode == ARMII::AddrModeT1_s && isThumSpillRestore)
+      ImmOp.ChangeToImmediate(0);
+    else {
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask*Scale);
+    }
+  }
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert(Offset && "This code isn't needed if offset already handled!");
+
+  if (Desc.mayLoad()) {
+    // Use the destination register to materialize sp + offset.
+    unsigned TmpReg = MI.getOperand(0).getReg();
+    bool UseRR = false;
+    if (Opcode == ARM::tRestore) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                 Offset, false, TII, *this, dl);
+      else {
+        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
+        UseRR = true;
+      }
+    } else
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                *this, dl);
+    MI.setDesc(TII.get(ARM::tLDR));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)
+      // Use [reg, reg] addrmode.
+      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+    else  // tLDR has an extra register operand.
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+  } else if (Desc.mayStore()) {
+    // FIXME! This is horrific!!! We need register scavenging.
+    // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+    // also a ABI register so it's possible that is is the register that is
+    // being storing here. If that's the case, we do the following:
+    // r12 = r2
+    // Use r2 to materialize sp + offset
+    // str r3, r2
+    // r2 = r12
+    unsigned ValReg = MI.getOperand(0).getReg();
+    unsigned TmpReg = ARM::R3;
+    bool UseRR = false;
+    if (ValReg == ARM::R3) {
+      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R2, RegState::Kill);
+      TmpReg = ARM::R2;
+    }
+    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+      BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R3, RegState::Kill);
+    if (Opcode == ARM::tSpill) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                 Offset, false, TII, *this, dl);
+      else {
+        emitLoadConstPool(MBB, II, TmpReg, Offset, &TII, dl);
+        UseRR = true;
+      }
+    } else
+      emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                *this, dl);
+    MI.setDesc(TII.get(ARM::tSTR));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)  // Use [reg, reg] addrmode.
+      MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+    else // tSTR has an extra register operand.
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+
+    MachineBasicBlock::iterator NII = next(II);
+    if (ValReg == ARM::R3)
+      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
+        .addReg(ARM::R12, RegState::Kill);
+    if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+      BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+        .addReg(ARM::R12, RegState::Kill);
+  } else
+    assert(false && "Unexpected opcode!");
+}
+
+void Thumb2RegisterInfo::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = (MBBI != MBB.end() ?
+                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+  // Check if R3 is live in. It might have to be used as a scratch register.
+  for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
+         E = MF.getRegInfo().livein_end(); I != E; ++I) {
+    if (I->first == ARM::R3) {
+      AFI->setR3IsLiveIn(true);
+      break;
+    }
+  }
+
+  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+  NumBytes = (NumBytes + 3) & ~3;
+  MFI->setStackSize(NumBytes);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, -VARegSaveSize);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+    ++MBBI;
+    if (MBBI != MBB.end())
+      dl = MBBI->getDebugLoc();
+  }
+
+  // Darwin ABI requires FP to point to the stack slot that contains the
+  // previous FP.
+  if (STI.isTargetDarwin() || hasFP(MF)) {
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0);
+  }
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+  NumBytes = DPRCSOffset;
+  if (NumBytes) {
+    // Insert it after all the callee-save spills.
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
+  }
+
+  if (STI.isTargetELF() && hasFP(MF)) {
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+  }
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+  return (MI->getOpcode() == ARM::tRestore &&
+          MI->getOperand(1).isFI() &&
+          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+}
+
+void Thumb2RegisterInfo::emitEpilogue(MachineFunction &MF,
+                                      MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  assert((MBBI->getOpcode() == ARM::tBX_RET ||
+          MBBI->getOpcode() == ARM::tPOP_RET) &&
+         "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / FLDD.
+    const unsigned *CSRegs = getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+      if (!isCSRestore(MBBI, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    if (hasFP(MF)) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot or target is ELF and the function has FP.
+      if (NumBytes)
+        emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
+                                  TII, *this, dl);
+      else
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
+          .addReg(FramePtr);
+    } else {
+      if (MBBI->getOpcode() == ARM::tBX_RET &&
+          &MBB.front() != MBBI &&
+          prior(MBBI)->getOpcode() == ARM::tPOP) {
+        MachineBasicBlock::iterator PMBBI = prior(MBBI);
+        emitSPUpdate(MBB, PMBBI, TII, dl, *this, NumBytes);
+      } else
+        emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
+    }
+  }
+
+  if (VARegSaveSize) {
+    // Epilogue for vararg functions: pop LR to R3 and branch off it.
+    // FIXME: Verify this is still ok when R3 is no longer being reserved.
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
+
+    emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
+
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
+    MBB.erase(MBBI);
+  }
+}
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
new file mode 100644
index 0000000..d379c31
--- /dev/null
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -0,0 +1,60 @@
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2REGISTERINFO_H
+#define THUMB2REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
+public:
+  Thumb2RegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         unsigned DestReg, int Val,
+                         const TargetInstrInfo *TII,
+                         DebugLoc dl) const;
+
+  /// Code Generation virtual methods...
+  const TargetRegisterClass *
+    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+
+  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  bool hasReservedCallFrame(MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+};
+}
+
+#endif // THUMB2REGISTERINFO_H
diff --git a/lib/Target/ARM/ThumbInstrInfo.h b/lib/Target/ARM/ThumbInstrInfo.h
deleted file mode 100644
index dcf1095..0000000
--- a/lib/Target/ARM/ThumbInstrInfo.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- ThumbInstrInfo.h - Thumb Instruction Information ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef THUMBINSTRUCTIONINFO_H
-#define THUMBINSTRUCTIONINFO_H
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "ARMRegisterInfo.h"
-#include "ARM.h"
-#include "ARMInstrInfo.h"
-
-namespace llvm {
-  class ARMSubtarget;
-
-class ThumbInstrInfo : public ARMBaseInstrInfo {
-public:
-  explicit ThumbInstrInfo(const ARMSubtarget &STI);
-
-  /// Return true if the instruction is a register to register move and return
-  /// the source and dest operands and their sub-register indices by reference.
-  virtual bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg, unsigned &DstReg,
-                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
-  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                       int &FrameIndex) const;
-  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const;
-
-  virtual bool copyRegToReg(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator I,
-                            unsigned DestReg, unsigned SrcReg,
-                            const TargetRegisterClass *DestRC,
-                            const TargetRegisterClass *SrcRC) const;
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC) const;
-
-  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                              SmallVectorImpl<MachineOperand> &Addr,
-                              const TargetRegisterClass *RC,
-                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC) const;
-
-  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                               SmallVectorImpl<MachineOperand> &Addr,
-                               const TargetRegisterClass *RC,
-                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
-  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
-
-  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                              MachineInstr* MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
-                                              MachineInstr* LoadMI) const {
-    return 0;
-  }
-
-  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                              MachineInstr* MI,
-                                           const SmallVectorImpl<unsigned> &Ops,
-                                              int FrameIndex) const;
-};
-}
-
-#endif // THUMBINSTRUCTIONINFO_H
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
index 2815176..0818e25 100644
--- a/lib/Target/Alpha/Alpha.h
+++ b/lib/Target/Alpha/Alpha.h
@@ -27,7 +27,6 @@ namespace llvm {
   FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
   FunctionPass *createAlphaCodePrinterPass(raw_ostream &OS,
                                            TargetMachine &TM,
-                                           CodeGenOpt::Level OptLevel,
                                            bool Verbose);
   FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
   FunctionPass *createAlphaCodeEmitterPass(AlphaTargetMachine &TM,
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 1be1713..fa0b656 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -181,6 +181,11 @@ const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
   }
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned AlphaTargetLowering::getFunctionAlignment(const Function *F) const {
+  return 4;
+}
+
 static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
   MVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
@@ -360,7 +365,8 @@ static SDValue LowerRET(SDValue Op, SelectionDAG &DAG) {
 std::pair<SDValue, SDValue>
 AlphaTargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, 
                                  bool RetSExt, bool RetZExt, bool isVarArg,
-                                 bool isInreg, unsigned CallingConv, 
+                                 bool isInreg, unsigned NumFixedArgs,
+                                 unsigned CallingConv, 
                                  bool isTailCall, SDValue Callee, 
                                  ArgListTy &Args, SelectionDAG &DAG,
                                  DebugLoc dl) {
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index fdd817c..4925367 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -86,9 +86,9 @@ namespace llvm {
     /// actual call.
     virtual std::pair<SDValue, SDValue>
     LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt,
-                bool isVarArg, bool isInreg, unsigned CC, bool isTailCall, 
-                SDValue Callee, ArgListTy &Args, SelectionDAG &DAG, 
-                DebugLoc dl);
+                bool isVarArg, bool isInreg, unsigned NumFixedArgs, unsigned CC,
+                bool isTailCall, SDValue Callee, ArgListTy &Args,
+                SelectionDAG &DAG, DebugLoc dl);
 
     ConstraintType getConstraintType(const std::string &Constraint) const;
 
@@ -103,6 +103,9 @@ namespace llvm {
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
   private:
     // Helpers for custom lowering.
     void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 229f9d4..76a594f 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -289,19 +289,22 @@ MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
        if (Ops[0] == 0) {  // move -> store
          unsigned InReg = MI->getOperand(1).getReg();
          bool isKill = MI->getOperand(1).isKill();
+         bool isUndef = MI->getOperand(1).isUndef();
          Opc = (Opc == Alpha::BISr) ? Alpha::STQ : 
            ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT);
          NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
-           .addReg(InReg, getKillRegState(isKill))
+           .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef))
            .addFrameIndex(FrameIndex)
            .addReg(Alpha::F31);
        } else {           // load -> move
          unsigned OutReg = MI->getOperand(0).getReg();
          bool isDead = MI->getOperand(0).isDead();
+         bool isUndef = MI->getOperand(0).isUndef();
          Opc = (Opc == Alpha::BISr) ? Alpha::LDQ : 
            ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT);
          NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
-           .addReg(OutReg, RegState::Define | getDeadRegState(isDead))
+           .addReg(OutReg, RegState::Define | getDeadRegState(isDead) |
+                   getUndefRegState(isUndef))
            .addFrameIndex(FrameIndex)
            .addReg(Alpha::F31);
        }
@@ -470,6 +473,7 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29,
                               &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
   assert(Ok && "Couldn't assign to global base register!");
+  Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(Alpha::R29);
 
   AlphaFI->setGlobalBaseReg(GlobalBaseReg);
@@ -496,6 +500,7 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
   bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26,
                               &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
   assert(Ok && "Couldn't assign to global return address register!");
+  Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(Alpha::R26);
 
   AlphaFI->setGlobalRetAddr(GlobalRetAddr);
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 10952eb..060089c 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -94,7 +94,7 @@ bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
@@ -104,7 +104,7 @@ bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
   return false;
 }
@@ -115,7 +115,7 @@ bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
   return false;
 }
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 946ca55..26684c7 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -41,7 +41,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index e0c0a64..982ef5e 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -38,9 +38,8 @@ namespace {
     ///
 
     explicit AlphaAsmPrinter(raw_ostream &o, TargetMachine &tm,
-                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                             bool V)
-      : AsmPrinter(o, tm, T, OL, V) {}
+                             const TargetAsmInfo *T, bool V)
+      : AsmPrinter(o, tm, T, V) {}
 
     virtual const char *getPassName() const {
       return "Alpha Assembly Printer";
@@ -70,9 +69,8 @@ namespace {
 ///
 FunctionPass *llvm::createAlphaCodePrinterPass(raw_ostream &o,
                                                TargetMachine &tm,
-                                               CodeGenOpt::Level OptLevel,
                                                bool verbose) {
-  return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new AlphaAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 #include "AlphaGenAsmWriter.inc"
@@ -155,7 +153,7 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   const Function *F = MF.getFunction();
   SwitchToSection(TAI->SectionForGlobal(F));
 
-  EmitAlignment(4, F);
+  EmitAlignment(MF.getAlignment(), F);
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
   case Function::InternalLinkage:  // Symbols default to internal.
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
index 1e535f7..2a382d5 100644
--- a/lib/Target/Alpha/CMakeLists.txt
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -23,3 +23,5 @@ add_llvm_target(AlphaCodeGen
   AlphaTargetAsmInfo.cpp
   AlphaTargetMachine.cpp
   )
+
+target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 02b625b..2847d0b 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -50,9 +50,8 @@ namespace {
     std::set<std::string> FnStubs, GVStubs;
   public:
     explicit SPUAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                           const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                           bool V) :
-      AsmPrinter(O, TM, T, OL, V) {}
+                           const TargetAsmInfo *T, bool V) :
+      AsmPrinter(O, TM, T, V) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
@@ -290,9 +289,8 @@ namespace {
     DwarfWriter *DW;
   public:
     explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM,
-                             const TargetAsmInfo *T, CodeGenOpt::Level F,
-                             bool V)
-      : SPUAsmPrinter(O, TM, T, F, V), DW(0) {}
+                             const TargetAsmInfo *T, bool V)
+      : SPUAsmPrinter(O, TM, T, V), DW(0) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
@@ -433,7 +431,7 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
   const Function *F = MF.getFunction();
 
   SwitchToSection(TAI->SectionForGlobal(F));
-  EmitAlignment(3, F);
+  EmitAlignment(MF.getAlignment(), F);
 
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
@@ -603,9 +601,8 @@ bool LinuxAsmPrinter::doFinalization(Module &M) {
 ///
 FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
                                             SPUTargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose) {
-  return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 // Force static initialization.
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index e3e12ac..8a55845 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -22,3 +22,5 @@ add_llvm_target(CellSPUCodeGen
   SPUTargetAsmInfo.cpp
   SPUTargetMachine.cpp
   )
+
+target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index 77a062e..10d1110 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -26,7 +26,6 @@ namespace llvm {
   FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
   FunctionPass *createSPUAsmPrinterPass(raw_ostream &o,
                                         SPUTargetMachine &tm,
-                                        CodeGenOpt::Level OptLevel,
                                         bool verbose);
 
   /*--== Utility functions/predicates/etc used all over the place: --==*/
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index b286443..d8a7776 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -113,7 +113,7 @@ namespace {
     const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
     std::pair<SDValue, SDValue> CallInfo =
             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                            CallingConv::C, false, Callee, Args, DAG,
+                            0, CallingConv::C, false, Callee, Args, DAG,
                             Op.getDebugLoc());
 
     return CallInfo.first;
@@ -481,6 +481,11 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
   return ((i != node_names.end()) ? i->second : 0);
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
+  return 3;
+}
+
 //===----------------------------------------------------------------------===//
 // Return the Cell SPU's SETCC result type
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 866c632..b1583f4 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -89,7 +89,6 @@ namespace llvm {
     public TargetLowering
   {
     int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
-    int ReturnAddrIndex;              // FrameIndex for return slot.
     SPUTargetMachine &SPUTM;
 
   public:
@@ -148,6 +147,9 @@ namespace llvm {
     virtual bool isLegalAddressImmediate(GlobalValue *) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   };
 }
 
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 4af995a..e629c8d 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -491,19 +491,22 @@ SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       if (FrameIndex < SPUFrameInfo::maxFrameOffset()) {
         MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(),
                                           get(SPU::STQDr32));
 
-        MIB.addReg(InReg, getKillRegState(isKill));
+        MIB.addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef));
         NewMI = addFrameReference(MIB, FrameIndex);
       }
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc));
 
-      MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead));
+      MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead) |
+                 getUndefRegState(isUndef));
       Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset())
         ? SPU::STQDr32 : SPU::STQXr32;
       NewMI = addFrameReference(MIB, FrameIndex);
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 256d63d..2470972 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -94,6 +94,6 @@ bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index d8fe300..4c28521 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -43,7 +43,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             SPUTargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
index 6b34a4e..d85c0ea 100644
--- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -38,9 +38,8 @@ namespace {
     std::set<std::string> ExternalFunctionNames, ExternalObjectNames;
   public:
     explicit IA64AsmPrinter(raw_ostream &O, TargetMachine &TM,
-                            const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                            bool V)
-      : AsmPrinter(O, TM, T, OL, V) {}
+                            const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V) {}
 
     virtual const char *getPassName() const {
       return "IA64 Assembly Printer";
@@ -137,7 +136,7 @@ bool IA64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   SwitchToSection(TAI->SectionForGlobal(F));
 
   // Print out labels for the function.
-  EmitAlignment(5);
+  EmitAlignment(MF.getAlignment());
   O << "\t.global\t" << CurrentFnName << '\n';
 
   printVisibility(CurrentFnName, F->getVisibility());
@@ -373,9 +372,8 @@ bool IA64AsmPrinter::doFinalization(Module &M) {
 ///
 FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o,
                                               IA64TargetMachine &tm,
-                                              CodeGenOpt::Level OptLevel,
                                               bool verbose) {
-  return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 namespace {
diff --git a/lib/Target/IA64/CMakeLists.txt b/lib/Target/IA64/CMakeLists.txt
index 26f86ca..638ed2e 100644
--- a/lib/Target/IA64/CMakeLists.txt
+++ b/lib/Target/IA64/CMakeLists.txt
@@ -18,3 +18,5 @@ add_llvm_target(IA64CodeGen
   IA64TargetAsmInfo.cpp
   IA64TargetMachine.cpp
   )
+
+target_link_libraries (LLVMIA64CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/IA64/IA64.h b/lib/Target/IA64/IA64.h
index ec8e3d6..9c758fd 100644
--- a/lib/Target/IA64/IA64.h
+++ b/lib/Target/IA64/IA64.h
@@ -39,7 +39,6 @@ FunctionPass *createIA64BundlingPass(IA64TargetMachine &TM);
 ///
 FunctionPass *createIA64CodePrinterPass(raw_ostream &o,
                                         IA64TargetMachine &tm,
-                                        CodeGenOpt::Level OptLevel,
                                         bool verbose);
 
 } // End llvm namespace
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
index c545b9c..c622345 100644
--- a/lib/Target/IA64/IA64ISelLowering.cpp
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -148,6 +148,11 @@ MVT IA64TargetLowering::getSetCCResultType(MVT VT) const {
   return MVT::i1;
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned IA64TargetLowering::getFunctionAlignment(const Function *) const {
+  return 5;
+}
+
 void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
                                         SmallVectorImpl<SDValue> &ArgValues,
                                         DebugLoc dl) {
@@ -310,7 +315,8 @@ void IA64TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG,
 std::pair<SDValue, SDValue>
 IA64TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
                                 bool RetSExt, bool RetZExt, bool isVarArg,
-                                bool isInreg, unsigned CallingConv, 
+                                bool isInreg, unsigned NumFixedArgs,
+                                unsigned CallingConv, 
                                 bool isTailCall, SDValue Callee, 
                                 ArgListTy &Args, SelectionDAG &DAG,
                                 DebugLoc dl) {
diff --git a/lib/Target/IA64/IA64ISelLowering.h b/lib/Target/IA64/IA64ISelLowering.h
index edf7eb8..b9c8bf2 100644
--- a/lib/Target/IA64/IA64ISelLowering.h
+++ b/lib/Target/IA64/IA64ISelLowering.h
@@ -62,7 +62,7 @@ namespace llvm {
     virtual std::pair<SDValue, SDValue>
       LowerCallTo(SDValue Chain, const Type *RetTy,
                   bool RetSExt, bool RetZExt, bool isVarArg, bool isInreg,
-                  unsigned CC, bool isTailCall, 
+                  unsigned NumFixedArgs, unsigned CC, bool isTailCall, 
                   SDValue Callee, ArgListTy &Args, SelectionDAG &DAG,
                   DebugLoc dl);
 
@@ -70,6 +70,8 @@ namespace llvm {
     /// (currently, only "ret void")
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
     
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   };
 }
 
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
index 4b05e1d..71a0a98 100644
--- a/lib/Target/IA64/IA64TargetMachine.cpp
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -73,7 +73,7 @@ IA64TargetMachine::IA64TargetMachine(const Module &M, const std::string &FS)
 //===----------------------------------------------------------------------===//
 
 bool IA64TargetMachine::addInstSelector(PassManagerBase &PM,
-                                        CodeGenOpt::Level OptLevel){
+                                        CodeGenOpt::Level OptLevel) {
   PM.add(createIA64DAGToDAGInstructionSelector(*this));
   return false;
 }
@@ -91,7 +91,7 @@ bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
 
diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h
index a64da9f..402d833 100644
--- a/lib/Target/IA64/IA64TargetMachine.h
+++ b/lib/Target/IA64/IA64TargetMachine.h
@@ -38,7 +38,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             IA64TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index ed0cd04..fc13c9e 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -26,7 +26,6 @@ namespace llvm {
                                     CodeGenOpt::Level OptLevel);
   FunctionPass *createMSP430CodePrinterPass(raw_ostream &o,
                                             MSP430TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
 } // end namespace llvm;
 
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 71b785b..b1fa3f0 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -40,9 +40,8 @@ namespace {
   class VISIBILITY_HIDDEN MSP430AsmPrinter : public AsmPrinter {
   public:
     MSP430AsmPrinter(raw_ostream &O, MSP430TargetMachine &TM,
-                     const TargetAsmInfo *TAI,
-                     CodeGenOpt::Level OL, bool V)
-      : AsmPrinter(O, TM, TAI, OL, V) {}
+                     const TargetAsmInfo *TAI, bool V)
+      : AsmPrinter(O, TM, TAI, V) {}
 
     virtual const char *getPassName() const {
       return "MSP430 Assembly Printer";
@@ -77,9 +76,8 @@ namespace {
 ///
 FunctionPass *llvm::createMSP430CodePrinterPass(raw_ostream &o,
                                                 MSP430TargetMachine &tm,
-                                                CodeGenOpt::Level OptLevel,
                                                 bool verbose) {
-  return new MSP430AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new MSP430AsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 bool MSP430AsmPrinter::doInitialization(Module &M) {
@@ -97,10 +95,7 @@ void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
 
   SwitchToSection(TAI->SectionForGlobal(F));
 
-  unsigned FnAlign = 4;
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    FnAlign = 1;
-
+  unsigned FnAlign = MF.getAlignment();
   EmitAlignment(FnAlign, F);
 
   switch (F->getLinkage()) {
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 14db20e..91a8663 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -127,6 +127,11 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const {
+  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+}
+
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 404534d..4a90a0e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -74,6 +74,9 @@ namespace llvm {
     /// DAG node.
     virtual const char *getTargetNodeName(unsigned Opcode) const;
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
     SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
     SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
     SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index ef6f997..d40bac7 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -350,6 +350,7 @@ unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const {
 
 int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   assert(0 && "Not implemented yet!");
+  return 0;
 }
 
 #include "MSP430GenRegisterInfo.inc"
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index b1fe758..dd09d43 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -62,7 +62,7 @@ bool MSP430TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                              bool Verbose,
                                              raw_ostream &Out) {
   // Output assembly language.
-  PM.add(createMSP430CodePrinterPass(Out, *this, OptLevel, Verbose));
+  PM.add(createMSP430CodePrinterPass(Out, *this, Verbose));
   return false;
 }
 
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 431630b..cb40479 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -51,9 +51,8 @@ namespace {
     const MipsSubtarget *Subtarget;
   public:
     explicit MipsAsmPrinter(raw_ostream &O, MipsTargetMachine &TM, 
-                            const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                            bool V)
-      : AsmPrinter(O, TM, T, OL, V) {
+                            const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V) {
       Subtarget = &TM.getSubtarget<MipsSubtarget>();
     }
 
@@ -93,9 +92,8 @@ namespace {
 /// regardless of whether the function is in SSA form.
 FunctionPass *llvm::createMipsCodePrinterPass(raw_ostream &o,
                                               MipsTargetMachine &tm,
-                                              CodeGenOpt::Level OptLevel,
                                               bool verbose) {
-  return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new MipsAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 //===----------------------------------------------------------------------===//
@@ -230,7 +228,7 @@ emitFunctionStart(MachineFunction &MF)
   SwitchToSection(TAI->SectionForGlobal(F));
 
   // 2 bits aligned
-  EmitAlignment(2, F);
+  EmitAlignment(MF.getAlignment(), F);
 
   O << "\t.globl\t"  << CurrentFnName << '\n';
   O << "\t.ent\t"    << CurrentFnName << '\n';
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 70c7a51..d27e6f1 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -20,3 +20,5 @@ add_llvm_target(MipsCodeGen
   MipsTargetAsmInfo.cpp
   MipsTargetMachine.cpp
   )
+
+target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 0accb4e..9b22a91 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -27,7 +27,6 @@ namespace llvm {
   FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
   FunctionPass *createMipsCodePrinterPass(raw_ostream &OS, 
                                           MipsTargetMachine &TM,
-                                          CodeGenOpt::Level OptLevel,
                                           bool Verbose);
 } // end namespace llvm;
 
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 42afceb..3d2e2b7 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -154,11 +154,14 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   computeRegisterProperties();
 }
 
-
 MVT MipsTargetLowering::getSetCCResultType(MVT VT) const {
   return MVT::i32;
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const {
+  return 2;
+}
 
 SDValue MipsTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) 
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 55cd6ea..9ad4895 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -68,8 +68,6 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   class MipsTargetLowering : public TargetLowering 
   {
-    // FrameIndex for return slot.
-    int ReturnAddrIndex;
   public:
 
     explicit MipsTargetLowering(MipsTargetMachine &TM);
@@ -84,6 +82,8 @@ namespace llvm {
     /// getSetCCResultType - get the ISD::SETCC result ValueType
     MVT getSetCCResultType(MVT VT) const;
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   private:
     // Subtarget Info
     const MipsSubtarget *Subtarget;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 92af973..e16fd8e 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -291,14 +291,17 @@ foldMemoryOperandImpl(MachineFunction &MF,
       if (Ops[0] == 0) {    // COPY -> STORE
         unsigned SrcReg = MI->getOperand(2).getReg();
         bool isKill = MI->getOperand(2).isKill();
+        bool isUndef = MI->getOperand(2).isUndef();
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW))
-          .addReg(SrcReg, getKillRegState(isKill))
+          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
           .addImm(0).addFrameIndex(FI);
       } else {              // COPY -> LOAD
         unsigned DstReg = MI->getOperand(0).getReg();
         bool isDead = MI->getOperand(0).isDead();
+        bool isUndef = MI->getOperand(0).isUndef();
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW))
-          .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+          .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
+                  getUndefRegState(isUndef))
           .addImm(0).addFrameIndex(FI);
       }
     }
@@ -321,14 +324,17 @@ foldMemoryOperandImpl(MachineFunction &MF,
       if (Ops[0] == 0) {    // COPY -> STORE
         unsigned SrcReg = MI->getOperand(1).getReg();
         bool isKill = MI->getOperand(1).isKill();
+        bool isUndef = MI->getOperand(2).isUndef();
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc))
-          .addReg(SrcReg, getKillRegState(isKill))
+          .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
           .addImm(0).addFrameIndex(FI) ;
       } else {              // COPY -> LOAD
         unsigned DstReg = MI->getOperand(0).getReg();
         bool isDead = MI->getOperand(0).isDead();
+        bool isUndef = MI->getOperand(0).isUndef();
         NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc))
-          .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+          .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
+                  getUndefRegState(isUndef))
           .addImm(0).addFrameIndex(FI);
       }
     }
@@ -645,6 +651,7 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
                               Mips::CPURegsRegisterClass,
                               Mips::CPURegsRegisterClass);
   assert(Ok && "Couldn't assign to global base register!");
+  Ok = Ok; // Silence warning when assertions are turned off.
   RegInfo.addLiveIn(Mips::GP);
 
   MipsFI->setGlobalBaseReg(GlobalBaseReg);
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index c5f117b..4675536 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -134,6 +134,6 @@ addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
                    bool Verbose, raw_ostream &Out)  {
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
-  PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+  PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 85fafad..95e5be4 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -39,7 +39,6 @@ namespace llvm {
     // linked in.
     typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                               MipsTargetMachine &tm,
-                                              CodeGenOpt::Level OptLevel,
                                               bool verbose);
     static AsmPrinterCtorFn AsmPrinterCtor;
     
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
index cf0f9db..7940648 100644
--- a/lib/Target/PIC16/PIC16.h
+++ b/lib/Target/PIC16/PIC16.h
@@ -151,6 +151,7 @@ namespace PIC16CC {
         return STATIC_LOCAL;
  
       assert (0 && "Could not determine Symbol's tag");
+      return PREFIX_SYMBOL; // Silence warning when assertions are turned off.
     }
 
     // addPrefix - add prefix symbol to a name if there isn't one already.
@@ -331,7 +332,6 @@ namespace PIC16CC {
   FunctionPass *createPIC16ISelDag(PIC16TargetMachine &TM);
   FunctionPass *createPIC16CodePrinterPass(raw_ostream &OS, 
                                            PIC16TargetMachine &TM,
-                                           CodeGenOpt::Level OptLevel,
                                            bool Verbose);
   // Banksel optimzer pass.
   FunctionPass *createPIC16MemSelOptimizerPass();
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
index ca1089b..1fc1cc1 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -113,9 +113,8 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 ///
 FunctionPass *llvm::createPIC16CodePrinterPass(raw_ostream &o,
                                                PIC16TargetMachine &tm,
-                                               CodeGenOpt::Level OptLevel,
                                                bool verbose) {
-  return new PIC16AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new PIC16AsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
index 3ec5659..cb11687 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -30,9 +30,8 @@
 namespace llvm {
   struct VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
     explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM,
-                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                             bool V)
-      : AsmPrinter(O, TM, T, OL, V), DbgInfo(O, T) {
+                             const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V), DbgInfo(O, T) {
       PTLI = TM.getTargetLowering();
       PTAI = static_cast<const PIC16TargetAsmInfo *> (T);
     }
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index ec1db90..0d24f61 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -399,7 +399,7 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
    const Type *RetTy = RetVT.getTypeForMVT();
    std::pair<SDValue,SDValue> CallInfo = 
      LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                 false, CallingConv::C, false, Callee, Args, DAG, dl);
+                 false, 0, CallingConv::C, false, Callee, Args, DAG, dl);
 
   return CallInfo.first;
 }
@@ -841,12 +841,16 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) {
     // i.e. without any extension
     MVT MemVT = LD->getMemoryVT();
     unsigned MemBytes = MemVT.getSizeInBits() / 8;
+    // if MVT::i1 is extended to MVT::i8 then MemBytes will be zero
+    // So set it to one
+    if (MemBytes == 0) MemBytes = 1;
+    
     unsigned ExtdBytes = VT.getSizeInBits() / 8;
     Offset = DAG.getConstant(LoadOffset, MVT::i8);
 
     Tys = DAG.getVTList(MVT::i8, MVT::Other); 
     // For MemBytes generate PIC16Load with proper offset
-    for (iter=0; iter<MemBytes; ++iter) {
+    for (iter=0; iter < MemBytes; ++iter) {
       // Add the pointer offset if any
       Offset = DAG.getConstant(iter + LoadOffset, MVT::i8);
       Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Chain, PtrLo, PtrHi,
@@ -1302,7 +1306,8 @@ SDValue PIC16TargetLowering::LegalizeCALL(SDValue Op, SelectionDAG &DAG) {
    // Generate new call with all the operands legal
    return DAG.getCall(TheCall->getCallingConv(), dl,
                       TheCall->isVarArg(), TheCall->isTailCall(),
-                      TheCall->isInreg(), VTs, &Ops[0], Ops.size());
+                      TheCall->isInreg(), VTs, &Ops[0], Ops.size(),
+                      TheCall->getNumFixedArgs());
 }
 
 void PIC16TargetLowering::
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index ca9650d..b40ea12 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -145,6 +145,11 @@ namespace llvm {
     unsigned GetTmpSize() { return TmpSize; }
     void SetTmpSize(unsigned Size) { TmpSize = Size; }
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *) const {
+      // FIXME: The function never seems to be aligned.
+      return 1;
+    }
   private:
     // If the Node is a BUILD_PAIR representing a direct Address,
     // then this function will return true.
diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td
index 7557716..a054bdc 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.td
+++ b/lib/Target/PIC16/PIC16InstrInfo.td
@@ -299,7 +299,7 @@ def store_indirect :
 // Direct load.
 // Input Operands are: ptrlo = GA, offset = offset, ptrhi = banksel.
 // Output: dst = W
-let mayLoad = 1 in
+let Defs = [STATUS], mayLoad = 1 in
 class MOVF_INSN<bits<6> OpCode, SDNode OpNodeSrc, SDNode Op>:
   ByteFormat<0, (outs GPR:$dst), 
              (ins i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index 4304732..77ad188 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -65,11 +65,11 @@ bool PIC16TargetMachine::addInstSelector(PassManagerBase &PM,
   return false;
 }
 
-bool PIC16TargetMachine::
-addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                   bool Verbose, raw_ostream &Out) {
+bool PIC16TargetMachine::addAssemblyEmitter(PassManagerBase &PM, 
+                                            CodeGenOpt::Level OptLevel,
+                                            bool Verbose, raw_ostream &Out) {
   // Output assembly language.
-  PM.add(createPIC16CodePrinterPass(Out, *this, OptLevel, Verbose));
+  PM.add(createPIC16CodePrinterPass(Out, *this, Verbose));
   return false;
 }
 
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index c5aa6ae..7f1673c 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -56,9 +56,8 @@ namespace {
     const PPCSubtarget &Subtarget;
   public:
     explicit PPCAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                           const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                           bool V)
-      : AsmPrinter(O, TM, T, OL, V),
+                           const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V),
         Subtarget(TM.getSubtarget<PPCSubtarget>()) {}
 
     virtual const char *getPassName() const {
@@ -189,8 +188,7 @@ namespace {
       if (TM.getRelocationModel() != Reloc::Static) {
         if (MO.getType() == MachineOperand::MO_GlobalAddress) {
           GlobalValue *GV = MO.getGlobal();
-          if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
-                GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+          if (GV->isDeclaration() || GV->isWeakForLinker()) {
             // Dynamically-resolved functions need a stub for the function.
             std::string Name = Mang->getValueName(GV);
             FnStubs.insert(Name);
@@ -296,9 +294,8 @@ namespace {
   class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter {
   public:
     explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
-                                const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                                bool V)
-      : PPCAsmPrinter(O, TM, T, OL, V){}
+                                const TargetAsmInfo *T, bool V)
+      : PPCAsmPrinter(O, TM, T, V){}
 
     virtual const char *getPassName() const {
       return "Linux PPC Assembly Printer";
@@ -323,9 +320,8 @@ namespace {
     raw_ostream &OS;
   public:
     explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
-                                 const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                                 bool V)
-      : PPCAsmPrinter(O, TM, T, OL, V), OS(O) {}
+                                 const TargetAsmInfo *T, bool V)
+      : PPCAsmPrinter(O, TM, T, V), OS(O) {}
 
     virtual const char *getPassName() const {
       return "Darwin PPC Assembly Printer";
@@ -387,11 +383,12 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
     if (TM.getRelocationModel() != Reloc::Static) {
       if (GV->isDeclaration() || GV->isWeakForLinker()) {
         if (GV->hasHiddenVisibility()) {
-          if (!GV->isDeclaration() && !GV->hasCommonLinkage())
-            O << Name;
-          else {
+          if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+              GV->hasAvailableExternallyLinkage()) {
             HiddenGVStubs.insert(Name);
             printSuffixedName(Name, "$non_lazy_ptr");
+          } else {
+            O << Name;
           }
         } else {
           GVStubs.insert(Name);
@@ -596,7 +593,7 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   printVisibility(CurrentFnName, F->getVisibility());
 
-  EmitAlignment(2, F);
+  EmitAlignment(MF.getAlignment(), F);
   O << CurrentFnName << ":\n";
 
   // Emit pre-function debug information.
@@ -773,7 +770,7 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   printVisibility(CurrentFnName, F->getVisibility());
 
-  EmitAlignment(F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4, F);
+  EmitAlignment(MF.getAlignment(), F);
   O << CurrentFnName << ":\n";
 
   // Emit pre-function debug information.
@@ -1119,16 +1116,13 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
 ///
 FunctionPass *llvm::createPPCAsmPrinterPass(raw_ostream &o,
                                             PPCTargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose) {
   const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
 
   if (Subtarget->isDarwin()) {
-    return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                                   OptLevel, verbose);
+    return new PPCDarwinAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
   } else {
-    return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                                  OptLevel, verbose);
+    return new PPCLinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
   }
 }
 
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 0b67aff..a6479d8 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -26,3 +26,5 @@ add_llvm_target(PowerPCCodeGen
   PPCTargetAsmInfo.cpp
   PPCTargetMachine.cpp
   )
+
+target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index c844e21..f6c3469 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -28,9 +28,8 @@ namespace llvm {
   
 FunctionPass *createPPCBranchSelectionPass();
 FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
-FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS,
-                                      PPCTargetMachine &TM,
-                                      CodeGenOpt::Level OptLevel, bool Verbose);
+FunctionPass *createPPCAsmPrinterPass(raw_ostream &OS, PPCTargetMachine &TM,
+                                      bool Verbose);
 FunctionPass *createPPCCodeEmitterPass(PPCTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
 FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 9f916f3..c7ce171 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -43,10 +43,8 @@ def CC_PPC : CallingConv<[
   CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
   
   // Common sub-targets passes FP values in F1 - F13
-  CCIfType<[f32, f64], CCIfSubtarget<"isMachoABI()",
-           CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>>,
-  // ELF32 sub-target pass FP values in F1 - F8.
-  CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+  CCIfType<[f32, f64], 
+           CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
            
   // The first 12 Vector arguments are passed in altivec registers.
   CCIfType<[v16i8, v8i16, v4i32, v4f32],
@@ -64,3 +62,86 @@ def CC_PPC : CallingConv<[
 
 */
 
+//===----------------------------------------------------------------------===//
+// PowerPC System V Release 4 ABI
+//===----------------------------------------------------------------------===//
+
+// _Complex arguments are never split, thus their two scalars are either
+// passed both in argument registers or both on the stack. Also _Complex
+// arguments are always passed in general purpose registers, never in
+// Floating-point registers or vector registers. Arguments which should go
+// on the stack are marked with the inreg parameter attribute.
+// Giving inreg this target-dependent (and counter-intuitive) meaning
+// simplifies things, because functions calls are not always coming from the
+// frontend but are also created implicitly e.g. for libcalls. If inreg would
+// actually mean that the argument is passed in a register, then all places
+// which create function calls/function definitions implicitly would need to
+// be aware of this fact and would need to mark arguments accordingly. With
+// inreg meaning that the argument is passed on the stack, this is not an
+// issue, except for calls which involve _Complex types.
+
+def CC_PPC_SVR4_Common : CallingConv<[
+  // The ABI requires i64 to be passed in two adjacent registers with the first
+  // register having an odd register number.
+  CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+
+  // The first 8 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCIf<"!ArgFlags.isInReg()",
+    CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>,
+
+  // Make sure the i64 words from a long double are either both passed in
+  // registers or both passed on the stack.
+  CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+  
+  // FP values are passed in F1 - F8.
+  CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+  // Split arguments have an alignment of 8 bytes on the stack.
+  CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
+  
+  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  
+  // Floats are stored in double precision format, thus they have the same
+  // alignment and size as doubles.
+  CCIfType<[f32,f64], CCAssignToStack<8, 8>>,  
+
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>>
+]>;
+
+// This calling convention puts vector arguments always on the stack. It is used
+// to assign vector arguments which belong to the variable portion of the
+// parameter list of a variable argument function.
+def CC_PPC_SVR4_VarArg : CallingConv<[
+  CCDelegateTo<CC_PPC_SVR4_Common>
+]>;
+
+// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
+// vector arguments in vector registers before putting them on the stack.
+def CC_PPC_SVR4 : CallingConv<[
+  // The first 12 Vector arguments are passed in AltiVec registers.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32],
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+           
+  CCDelegateTo<CC_PPC_SVR4_Common>
+]>;  
+
+// Helper "calling convention" to handle aggregate by value arguments.
+// Aggregate by value arguments are always placed in the local variable space
+// of the caller. This calling convention is only used to assign those stack
+// offsets in the callers stack frame.
+//
+// Still, the address of the aggregate copy in the callers stack frame is passed
+// in a GPR (or in the parameter list area if all GPRs are allocated) from the
+// caller to the callee. The location for the address argument is assigned by
+// the CC_PPC_SVR4 calling convention.
+//
+// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// not passed by value.
+ 
+def CC_PPC_SVR4_ByVal : CallingConv<[
+  CCIfByVal<CCPassByVal<4, 4>>,
+  
+  CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+]>;
+
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index aa3dce1..cd6018d 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -163,8 +163,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   } else if (MO.isGlobal() || MO.isSymbol() ||
              MO.isCPI() || MO.isJTI()) {
     unsigned Reloc = 0;
-    if (MI.getOpcode() == PPC::BL_Macho || MI.getOpcode() == PPC::BL8_Macho ||
-        MI.getOpcode() == PPC::BL_ELF || MI.getOpcode() == PPC::BL8_ELF ||
+    if (MI.getOpcode() == PPC::BL_Darwin || MI.getOpcode() == PPC::BL8_Darwin ||
+        MI.getOpcode() == PPC::BL_SVR4 || MI.getOpcode() == PPC::BL8_ELF ||
         MI.getOpcode() == PPC::TAILB || MI.getOpcode() == PPC::TAILB8)
       Reloc = PPC::reloc_pcrel_bx;
     else {
@@ -246,9 +246,9 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   } else if (MO.isMBB()) {
     unsigned Reloc = 0;
     unsigned Opcode = MI.getOpcode();
-    if (Opcode == PPC::B || Opcode == PPC::BL_Macho ||
-        Opcode == PPC::BLA_Macho || Opcode == PPC::BL_ELF || 
-        Opcode == PPC::BLA_ELF)
+    if (Opcode == PPC::B || Opcode == PPC::BL_Darwin ||
+        Opcode == PPC::BLA_Darwin|| Opcode == PPC::BL_SVR4 ||
+        Opcode == PPC::BLA_SVR4)
       Reloc = PPC::reloc_pcrel_bx;
     else // BCC instruction
       Reloc = PPC::reloc_pcrel_bcx;
diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h
index 1b5893d..770a560 100644
--- a/lib/Target/PowerPC/PPCFrameInfo.h
+++ b/lib/Target/PowerPC/PPCFrameInfo.h
@@ -14,8 +14,10 @@
 #define POWERPC_FRAMEINFO_H
 
 #include "PPC.h"
+#include "PPCSubtarget.h"
 #include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
 
 namespace llvm {
 
@@ -29,63 +31,153 @@ public:
 
   /// getReturnSaveOffset - Return the previous frame offset to save the
   /// return address.
-  static unsigned getReturnSaveOffset(bool LP64, bool isMacho) {
-    if (isMacho)
+  static unsigned getReturnSaveOffset(bool LP64, bool isDarwinABI) {
+    if (isDarwinABI)
       return LP64 ? 16 : 8;
-    // For ELF 32 ABI:
+    // SVR4 ABI:
     return 4;
   }
 
   /// getFramePointerSaveOffset - Return the previous frame offset to save the
   /// frame pointer.
-  static unsigned getFramePointerSaveOffset(bool LP64, bool isMacho) {
-    // For MachO ABI:
+  static unsigned getFramePointerSaveOffset(bool LP64, bool isDarwinABI) {
+    // For the Darwin ABI:
     // Use the TOC save slot in the PowerPC linkage area for saving the frame
     // pointer (if needed.)  LLVM does not generate code that uses the TOC (R2
     // is treated as a caller saved register.)
-    if (isMacho)
+    if (isDarwinABI)
       return LP64 ? 40 : 20;
     
-    // For ELF 32 ABI:
+    // SVR4 ABI:
     // Save it right before the link register
     return -4U;
   }
   
   /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
   ///
-  static unsigned getLinkageSize(bool LP64, bool isMacho) {
-    if (isMacho)
+  static unsigned getLinkageSize(bool LP64, bool isDarwinABI) {
+    if (isDarwinABI)
       return 6 * (LP64 ? 8 : 4);
     
-    // For ELF 32 ABI:
+    // SVR4 ABI:
     return 8;
   }
 
   /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
   /// argument area.
-  static unsigned getMinCallArgumentsSize(bool LP64, bool isMacho) {
-    // For Macho ABI:
+  static unsigned getMinCallArgumentsSize(bool LP64, bool isDarwinABI) {
+    // For the Darwin ABI:
     // The prolog code of the callee may store up to 8 GPR argument registers to
     // the stack, allowing va_start to index over them in memory if its varargs.
     // Because we cannot tell if this is needed on the caller side, we have to
     // conservatively assume that it is needed.  As such, make sure we have at
     // least enough stack space for the caller to store the 8 GPRs.
-    if (isMacho)
+    if (isDarwinABI)
       return 8 * (LP64 ? 8 : 4);
     
-    // For ELF 32 ABI:
+    // SVR4 ABI:
     // There is no default stack allocated for the 8 first GPR arguments.
     return 0;
   }
 
   /// getMinCallFrameSize - Return the minimum size a call frame can be using
   /// the PowerPC ABI.
-  static unsigned getMinCallFrameSize(bool LP64, bool isMacho) {
+  static unsigned getMinCallFrameSize(bool LP64, bool isDarwinABI) {
     // The call frame needs to be at least big enough for linkage and 8 args.
-    return getLinkageSize(LP64, isMacho) +
-           getMinCallArgumentsSize(LP64, isMacho);
+    return getLinkageSize(LP64, isDarwinABI) +
+           getMinCallArgumentsSize(LP64, isDarwinABI);
+  }
+
+  // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+  const std::pair<unsigned, int> *
+  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+    // Early exit if not using the SVR4 ABI.
+    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+      NumEntries = 0;
+      return 0;
+    }
+    
+    static const std::pair<unsigned, int> Offsets[] = {
+      // Floating-point register save area offsets.
+      std::pair<unsigned, int>(PPC::F31, -8),
+      std::pair<unsigned, int>(PPC::F30, -16),
+      std::pair<unsigned, int>(PPC::F29, -24),
+      std::pair<unsigned, int>(PPC::F28, -32),
+      std::pair<unsigned, int>(PPC::F27, -40),
+      std::pair<unsigned, int>(PPC::F26, -48),
+      std::pair<unsigned, int>(PPC::F25, -56),
+      std::pair<unsigned, int>(PPC::F24, -64),
+      std::pair<unsigned, int>(PPC::F23, -72),
+      std::pair<unsigned, int>(PPC::F22, -80),
+      std::pair<unsigned, int>(PPC::F21, -88),
+      std::pair<unsigned, int>(PPC::F20, -96),
+      std::pair<unsigned, int>(PPC::F19, -104),
+      std::pair<unsigned, int>(PPC::F18, -112),
+      std::pair<unsigned, int>(PPC::F17, -120),
+      std::pair<unsigned, int>(PPC::F16, -128),
+      std::pair<unsigned, int>(PPC::F15, -136),
+      std::pair<unsigned, int>(PPC::F14, -144),
+        
+      // General register save area offsets.
+      std::pair<unsigned, int>(PPC::R31, -4),
+      std::pair<unsigned, int>(PPC::R30, -8),
+      std::pair<unsigned, int>(PPC::R29, -12),
+      std::pair<unsigned, int>(PPC::R28, -16),
+      std::pair<unsigned, int>(PPC::R27, -20),
+      std::pair<unsigned, int>(PPC::R26, -24),
+      std::pair<unsigned, int>(PPC::R25, -28),
+      std::pair<unsigned, int>(PPC::R24, -32),
+      std::pair<unsigned, int>(PPC::R23, -36),
+      std::pair<unsigned, int>(PPC::R22, -40),
+      std::pair<unsigned, int>(PPC::R21, -44),
+      std::pair<unsigned, int>(PPC::R20, -48),
+      std::pair<unsigned, int>(PPC::R19, -52),
+      std::pair<unsigned, int>(PPC::R18, -56),
+      std::pair<unsigned, int>(PPC::R17, -60),
+      std::pair<unsigned, int>(PPC::R16, -64),
+      std::pair<unsigned, int>(PPC::R15, -68),
+      std::pair<unsigned, int>(PPC::R14, -72),
+
+      // CR save area offset.
+      // FIXME SVR4: Disable CR save area for now.
+//      std::pair<unsigned, int>(PPC::CR2, -4),
+//      std::pair<unsigned, int>(PPC::CR3, -4),
+//      std::pair<unsigned, int>(PPC::CR4, -4),
+//      std::pair<unsigned, int>(PPC::CR2LT, -4),
+//      std::pair<unsigned, int>(PPC::CR2GT, -4),
+//      std::pair<unsigned, int>(PPC::CR2EQ, -4),
+//      std::pair<unsigned, int>(PPC::CR2UN, -4),
+//      std::pair<unsigned, int>(PPC::CR3LT, -4),
+//      std::pair<unsigned, int>(PPC::CR3GT, -4),
+//      std::pair<unsigned, int>(PPC::CR3EQ, -4),
+//      std::pair<unsigned, int>(PPC::CR3UN, -4),
+//      std::pair<unsigned, int>(PPC::CR4LT, -4),
+//      std::pair<unsigned, int>(PPC::CR4GT, -4),
+//      std::pair<unsigned, int>(PPC::CR4EQ, -4),
+//      std::pair<unsigned, int>(PPC::CR4UN, -4),
+
+      // VRSAVE save area offset.
+      std::pair<unsigned, int>(PPC::VRSAVE, -4),
+      
+      // Vector register save area
+      std::pair<unsigned, int>(PPC::V31, -16),
+      std::pair<unsigned, int>(PPC::V30, -32),
+      std::pair<unsigned, int>(PPC::V29, -48),
+      std::pair<unsigned, int>(PPC::V28, -64),
+      std::pair<unsigned, int>(PPC::V27, -80),
+      std::pair<unsigned, int>(PPC::V26, -96),
+      std::pair<unsigned, int>(PPC::V25, -112),
+      std::pair<unsigned, int>(PPC::V24, -128),
+      std::pair<unsigned, int>(PPC::V23, -144),
+      std::pair<unsigned, int>(PPC::V22, -160),
+      std::pair<unsigned, int>(PPC::V21, -176),
+      std::pair<unsigned, int>(PPC::V20, -192)
+    };
+    
+    NumEntries = array_lengthof(Offsets);
+    
+    return Offsets;
   }
-  
 };
 
 } // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index e7658fc..ec3e757 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -159,7 +159,7 @@ getHazardType(SUnit *SU) {
   }
   
   // Do not allow MTCTR and BCTRL to be in the same dispatch group.
-  if (HasCTRSet && (Opcode == PPC::BCTRL_Macho || Opcode == PPC::BCTRL_ELF))
+  if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
     return NoopHazard;
   
   // If this is a load following a store, make sure it's not to the same or
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 87f8fb0b4..1c6b287 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -35,6 +35,21 @@
 #include "llvm/DerivedTypes.h"
 using namespace llvm;
 
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                     CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags,
+                                     CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
+                                            CCValAssign::LocInfo &LocInfo,
+                                            ISD::ArgFlagsTy &ArgFlags,
+                                            CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State);
+
 static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
 cl::desc("enable preincrement load/store generation on PPC (experimental)"),
                                      cl::Hidden);
@@ -190,8 +205,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 
-  // VAARG is custom lowered with ELF 32 ABI
-  if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI())
+  // VAARG is custom lowered with the SVR4 ABI
+  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI())
     setOperationAction(ISD::VAARG, MVT::Other, Custom);
   else
     setOperationAction(ISD::VAARG, MVT::Other, Expand);
@@ -380,7 +395,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
   // Darwin passes everything on 4 byte boundary.
   if (TM.getSubtarget<PPCSubtarget>().isDarwin())
     return 4;
-  // FIXME Elf TBD
+  // FIXME SVR4 TBD
   return 4;
 }
 
@@ -404,11 +419,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::SHL:             return "PPCISD::SHL";
   case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
   case PPCISD::STD_32:          return "PPCISD::STD_32";
-  case PPCISD::CALL_ELF:        return "PPCISD::CALL_ELF";
-  case PPCISD::CALL_Macho:      return "PPCISD::CALL_Macho";
+  case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
+  case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
-  case PPCISD::BCTRL_Macho:     return "PPCISD::BCTRL_Macho";
-  case PPCISD::BCTRL_ELF:       return "PPCISD::BCTRL_ELF";
+  case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
+  case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
   case PPCISD::MFCR:            return "PPCISD::MFCR";
   case PPCISD::VCMP:            return "PPCISD::VCMP";
@@ -428,11 +443,17 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   }
 }
 
-
 MVT PPCTargetLowering::getSetCCResultType(MVT VT) const {
   return MVT::i32;
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned PPCTargetLowering::getFunctionAlignment(const Function *F) const {
+  if (getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin())
+    return F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4;
+  else
+    return 2;
+}
 
 //===----------------------------------------------------------------------===//
 // Node matching predicates, for use by the tblgen matching code.
@@ -1228,7 +1249,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                               unsigned VarArgsNumFPR,
                               const PPCSubtarget &Subtarget) {
 
-  assert(0 && "VAARG in ELF32 ABI not implemented yet!");
+  assert(0 && "VAARG not yet implemented for the SVR4 ABI!");
   return SDValue(); // Not reached
 }
 
@@ -1261,7 +1282,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
   std::pair<SDValue, SDValue> CallResult =
     LowerCallTo(Chain, Op.getValueType().getTypeForMVT(), false, false,
-                false, false, CallingConv::C, false,
+                false, false, 0, CallingConv::C, false,
                 DAG.getExternalSymbol("__trampoline_setup", PtrVT),
                 Args, DAG, dl);
 
@@ -1279,7 +1300,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
                                         const PPCSubtarget &Subtarget) {
   DebugLoc dl = Op.getDebugLoc();
 
-  if (Subtarget.isMachoABI()) {
+  if (Subtarget.isDarwinABI()) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -1288,7 +1309,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
   }
 
-  // For ELF 32 ABI we follow the layout of the va_list struct.
+  // For the SVR4 ABI we follow the layout of the va_list struct.
   // We suppose the given va_list is already allocated.
   //
   // typedef struct {
@@ -1313,8 +1334,8 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   // } va_list[1];
 
 
-  SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8);
-  SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8);
+  SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32);
+  SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32);
 
 
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -1334,15 +1355,15 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
 
   // Store first byte : number of int regs
-  SDValue firstStore = DAG.getStore(Op.getOperand(0), dl, ArgGPR,
-                                      Op.getOperand(1), SV, 0);
+  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
+                                         Op.getOperand(1), SV, 0, MVT::i8);
   uint64_t nextOffset = FPROffset;
   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
                                   ConstFPROffset);
 
   // Store second byte : number of float regs
   SDValue secondStore =
-    DAG.getStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset);
+    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, SV, nextOffset, MVT::i8);
   nextOffset += StackOffset;
   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
 
@@ -1359,10 +1380,71 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
 #include "PPCGenCallingConv.inc"
 
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                     CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags,
+                                     CCState &State) {
+  return true;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
+                                            CCValAssign::LocInfo &LocInfo,
+                                            ISD::ArgFlagsTy &ArgFlags,
+                                            CCState &State) {
+  static const unsigned ArgRegs[] = {
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+  // Skip one register if the first unallocated register has an even register
+  // number and there are still argument registers available which have not been
+  // allocated yet. RegNum is actually an index into ArgRegs, which means we
+  // need to skip a register if RegNum is odd.
+  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+  
+  // Always return false here, as this function only makes sure that the first
+  // unallocated register has an odd register number and does not actually
+  // allocate a register for the current argument.
+  return false;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State) {
+  static const unsigned ArgRegs[] = {
+    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+    PPC::F8
+  };
+
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+  // If there is only one Floating-point register left we need to put both f64
+  // values of a split ppc_fp128 value on the stack.
+  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+  
+  // Always return false here, as this function only makes sure that the two f64
+  // values a ppc_fp128 value is split into are both passed in registers or both
+  // passed on the stack and does not actually allocate a register for the
+  // current argument.
+  return false;
+}
+
 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
 /// depending on which subtarget is selected.
 static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
-  if (Subtarget.isMachoABI()) {
+  if (Subtarget.isDarwinABI()) {
     static const unsigned FPR[] = {
       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
       PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
@@ -1381,9 +1463,9 @@ static const unsigned *GetFPR(const PPCSubtarget &Subtarget) {
 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
 /// the stack.
 static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
-                                       bool isVarArg, unsigned PtrByteSize) {
+                                       unsigned PtrByteSize) {
   MVT ArgVT = Arg.getValueType();
-  unsigned ArgSize =ArgVT.getSizeInBits()/8;
+  unsigned ArgSize = ArgVT.getSizeInBits()/8;
   if (Flags.isByVal())
     ArgSize = Flags.getByValSize();
   ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -1392,18 +1474,248 @@ static unsigned CalculateStackSlotSize(SDValue Arg, ISD::ArgFlagsTy Flags,
 }
 
 SDValue
-PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
-                                         SelectionDAG &DAG,
-                                         int &VarArgsFrameIndex,
-                                         int &VarArgsStackOffset,
-                                         unsigned &VarArgsNumGPR,
-                                         unsigned &VarArgsNumFPR,
-                                         const PPCSubtarget &Subtarget) {
+PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4(SDValue Op,
+                                              SelectionDAG &DAG,
+                                              int &VarArgsFrameIndex,
+                                              int &VarArgsStackOffset,
+                                              unsigned &VarArgsNumGPR,
+                                              unsigned &VarArgsNumFPR,
+                                              const PPCSubtarget &Subtarget) {
+  // SVR4 ABI Stack Frame Layout:
+  //              +-----------------------------------+
+  //        +-->  |            Back chain             |
+  //        |     +-----------------------------------+
+  //        |     | Floating-point register save area |
+  //        |     +-----------------------------------+
+  //        |     |    General register save area     |
+  //        |     +-----------------------------------+
+  //        |     |          CR save word             |
+  //        |     +-----------------------------------+
+  //        |     |         VRSAVE save word          |
+  //        |     +-----------------------------------+
+  //        |     |         Alignment padding         |
+  //        |     +-----------------------------------+
+  //        |     |     Vector register save area     |
+  //        |     +-----------------------------------+
+  //        |     |       Local variable space        |
+  //        |     +-----------------------------------+
+  //        |     |        Parameter list area        |
+  //        |     +-----------------------------------+
+  //        |     |           LR save word            |
+  //        |     +-----------------------------------+
+  // SP-->  +---  |            Back chain             |
+  //              +-----------------------------------+
+  //
+  // Specifications:
+  //   System V Application Binary Interface PowerPC Processor Supplement
+  //   AltiVec Technology Programming Interface Manual
+  
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  SmallVector<SDValue, 8> ArgValues;
+  SDValue Root = Op.getOperand(0);
+  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+  DebugLoc dl = Op.getDebugLoc();
+
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  // Potential tail calls could cause overwriting of argument stack slots.
+  unsigned CC = MF.getFunction()->getCallingConv();
+  bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
+  unsigned PtrByteSize = 4;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+  // Reserve space for the linkage area on the stack.
+  CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
+
+  CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4);
+  
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    
+    // Arguments stored in registers.
+    if (VA.isRegLoc()) {
+      TargetRegisterClass *RC;
+      MVT ValVT = VA.getValVT();
+      
+      switch (ValVT.getSimpleVT()) {
+        default:
+          assert(0 && "ValVT not supported by FORMAL_ARGUMENTS Lowering");
+        case MVT::i32:
+          RC = PPC::GPRCRegisterClass;
+          break;
+        case MVT::f32:
+          RC = PPC::F4RCRegisterClass;
+          break;
+        case MVT::f64:
+          RC = PPC::F8RCRegisterClass;
+          break;
+        case MVT::v16i8:
+        case MVT::v8i16:
+        case MVT::v4i32:
+        case MVT::v4f32:
+          RC = PPC::VRRCRegisterClass;
+          break;
+      }
+      
+      // Transform the arguments stored in physical registers into virtual ones.
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, ValVT);
+
+      ArgValues.push_back(ArgValue);
+    } else {
+      // Argument stored in memory.
+      assert(VA.isMemLoc());
+
+      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+                                      isImmutable);
+
+      // Create load nodes to retrieve arguments from the stack.
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+    }
+  }
+
+  // Assign locations to all of the incoming aggregate by value arguments.
+  // Aggregates passed by value are stored in the local variable space of the
+  // caller's stack frame, right above the parameter list area.
+  SmallVector<CCValAssign, 16> ByValArgLocs;
+  CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs);
+
+  // Reserve stack space for the allocations in CCInfo.
+  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+  CCByValInfo.AnalyzeFormalArguments(Op.getNode(), CC_PPC_SVR4_ByVal);
+
+  // Area that is at least reserved in the caller of this function.
+  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+  
+  // Set the size that is at least reserved in caller of this function.  Tail
+  // call optimized function's reserved stack space needs to be aligned so that
+  // taking the difference between two stack areas will result in an aligned
+  // stack.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+  MinReservedArea =
+    std::max(MinReservedArea,
+             PPCFrameInfo::getMinCallFrameSize(false, false));
+  
+  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
+    getStackAlignment();
+  unsigned AlignMask = TargetAlign-1;
+  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+  
+  FI->setMinReservedArea(MinReservedArea);
+
+  SmallVector<SDValue, 8> MemOps;
+  
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    static const unsigned GPArgRegs[] = {
+      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+    };
+    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+
+    static const unsigned FPArgRegs[] = {
+      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+      PPC::F8
+    };
+    const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+
+    VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs);
+    VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs);
+
+    // Make room for NumGPArgRegs and NumFPArgRegs.
+    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
+                NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
+
+    VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                                                CCInfo.getNextStackOffset());
+
+    VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8);
+    SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+
+    // The fixed integer arguments of a variadic function are
+    // stored to the VarArgsFrameIndex on the stack.
+    unsigned GPRIndex = 0;
+    for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) {
+      SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT);
+      SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+      MemOps.push_back(Store);
+      // Increment the address by four for the next argument to store
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+
+    // If this function is vararg, store any remaining integer argument regs
+    // to their spots on the stack so that they may be loaded by deferencing the
+    // result of va_next.
+    for (; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+      unsigned VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      MemOps.push_back(Store);
+      // Increment the address by four for the next argument to store
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+
+    // FIXME SVR4: We only need to save FP argument registers if CR bit 6 is
+    // set.
+    
+    // The double arguments are stored to the VarArgsFrameIndex
+    // on the stack.
+    unsigned FPRIndex = 0;
+    for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) {
+      SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64);
+      SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
+      MemOps.push_back(Store);
+      // Increment the address by eight for the next argument to store
+      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+                                         PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+
+    for (; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+      unsigned VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+      MemOps.push_back(Store);
+      // Increment the address by eight for the next argument to store
+      SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
+                                         PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+  }
+
+  if (!MemOps.empty())
+    Root = DAG.getNode(ISD::TokenFactor, dl,
+                       MVT::Other, &MemOps[0], MemOps.size());
+
+  
+  ArgValues.push_back(Root);
+
+  // Return the new list of results.
+  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+SDValue
+PPCTargetLowering::LowerFORMAL_ARGUMENTS_Darwin(SDValue Op,
+                                                SelectionDAG &DAG,
+                                                int &VarArgsFrameIndex,
+                                                const PPCSubtarget &Subtarget) {
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineRegisterInfo &RegInfo = MF.getRegInfo();
   SmallVector<SDValue, 8> ArgValues;
   SDValue Root = Op.getOperand(0);
   bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
@@ -1411,14 +1723,12 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
 
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
-  bool isMachoABI = Subtarget.isMachoABI();
-  bool isELF32_ABI = Subtarget.isELF32_ABI();
   // Potential tail calls could cause overwriting of argument stack slots.
   unsigned CC = MF.getFunction()->getCallingConv();
   bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
-  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
   // Area that is at least reserved in caller of this function.
   unsigned MinReservedArea = ArgOffset;
 
@@ -1439,7 +1749,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   };
 
   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
-  const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8;
+  const unsigned Num_FPR_Regs = 13;
   const unsigned Num_VR_Regs  = array_lengthof( VR);
 
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
@@ -1453,8 +1763,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   // that out...for the pathological case, compute VecArgOffset as the
   // start of the vector parameter area.  Computing VecArgOffset is the
   // entire point of the following loop.
-  // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying
-  // to handle Elf here.
   unsigned VecArgOffset = ArgOffset;
   if (!isVarArg && !isPPC64) {
     for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e;
@@ -1500,10 +1808,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   // Add DAG nodes to load the arguments or copy them out of registers.  On
   // entry to a function on PPC, the arguments start after the linkage area,
   // although the first ones are often in registers.
-  //
-  // In the ELF 32 ABI, GPRs and stack are double word align: an argument
-  // represented with two words (long long or double) must be copied to an
-  // even GPR_idx value or to an even ArgOffset value.
 
   SmallVector<SDValue, 8> MemOps;
   unsigned nAltivecParamsAtEnd = 0;
@@ -1516,8 +1820,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
     unsigned ArgSize = ObjSize;
     ISD::ArgFlagsTy Flags =
       cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags();
-    // See if next argument requires stack alignment in ELF
-    bool Align = Flags.isSplit();
 
     unsigned CurArgOffset = ArgOffset;
 
@@ -1528,25 +1830,20 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
         MinReservedArea = ((MinReservedArea+15)/16)*16;
         MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
                                                   Flags,
-                                                  isVarArg,
                                                   PtrByteSize);
       } else  nAltivecParamsAtEnd++;
     } else
       // Calculate min reserved area.
       MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo),
                                                 Flags,
-                                                isVarArg,
                                                 PtrByteSize);
 
-    // FIXME alignment for ELF may not be right
     // FIXME the codegen can be much improved in some cases.
     // We do not have to keep everything in memory.
     if (Flags.isByVal()) {
       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
       ObjSize = Flags.getByValSize();
       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
-      // Double word align in ELF
-      if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
       // Objects of size 1 and 2 are right justified, everything else is
       // left justified.  This means the memory address is adjusted forwards.
       if (ObjSize==1 || ObjSize==2) {
@@ -1558,17 +1855,16 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
       ArgValues.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
-          RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 );
           MemOps.push_back(Store);
           ++GPR_idx;
-          if (isMachoABI) ArgOffset += PtrByteSize;
-        } else {
-          ArgOffset += PtrByteSize;
         }
+        
+        ArgOffset += PtrByteSize;
+        
         continue;
       }
       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
@@ -1576,15 +1872,14 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
         // to memory.  ArgVal will be address of the beginning of
         // the object.
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
-          RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
-          if (isMachoABI) ArgOffset += PtrByteSize;
+          ArgOffset += PtrByteSize;
         } else {
           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
           break;
@@ -1597,30 +1892,22 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
     default: assert(0 && "Unhandled argument type!");
     case MVT::i32:
       if (!isPPC64) {
-        // Double word align in ELF
-        if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2);
-
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
-          RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
           ++GPR_idx;
         } else {
           needsLoad = true;
           ArgSize = PtrByteSize;
         }
-        // Stack align in ELF
-        if (needsLoad && Align && isELF32_ABI)
-          ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
-        // All int arguments reserve stack space in Macho ABI.
-        if (isMachoABI || needsLoad) ArgOffset += PtrByteSize;
+        // All int arguments reserve stack space in the Darwin ABI.
+        ArgOffset += PtrByteSize;
         break;
       }
       // FALLTHROUGH
     case MVT::i64:  // PPC64
       if (GPR_idx != Num_GPR_Regs) {
-        unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
-        RegInfo.addLiveIn(GPR[GPR_idx], VReg);
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
         ArgVal = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
 
         if (ObjectVT == MVT::i32) {
@@ -1641,37 +1928,35 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
         needsLoad = true;
         ArgSize = PtrByteSize;
       }
-      // All int arguments reserve stack space in Macho ABI.
-      if (isMachoABI || needsLoad) ArgOffset += 8;
+      // All int arguments reserve stack space in the Darwin ABI.
+      ArgOffset += 8;
       break;
 
     case MVT::f32:
     case MVT::f64:
       // Every 4 bytes of argument space consumes one of the GPRs available for
       // argument passing.
-      if (GPR_idx != Num_GPR_Regs && isMachoABI) {
+      if (GPR_idx != Num_GPR_Regs) {
         ++GPR_idx;
         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
           ++GPR_idx;
       }
       if (FPR_idx != Num_FPR_Regs) {
         unsigned VReg;
+
         if (ObjectVT == MVT::f32)
-          VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
         else
-          VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
-        RegInfo.addLiveIn(FPR[FPR_idx], VReg);
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
         ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
         ++FPR_idx;
       } else {
         needsLoad = true;
       }
 
-      // Stack align in ELF
-      if (needsLoad && Align && isELF32_ABI)
-        ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
-      // All FP arguments reserve stack space in Macho ABI.
-      if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize;
+      // All FP arguments reserve stack space in the Darwin ABI.
+      ArgOffset += isPPC64 ? 8 : ObjSize;
       break;
     case MVT::v4f32:
     case MVT::v4i32:
@@ -1680,8 +1965,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
       // Note that vector arguments in registers don't reserve stack space,
       // except in varargs functions.
       if (VR_idx != Num_VR_Regs) {
-        unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass);
-        RegInfo.addLiveIn(VR[VR_idx], VReg);
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
         ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
         if (isVarArg) {
           while ((ArgOffset % 16) != 0) {
@@ -1734,7 +2018,7 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   }
   MinReservedArea =
     std::max(MinReservedArea,
-             PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+             PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
   unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()->
     getStackAlignment();
   unsigned AlignMask = TargetAlign-1;
@@ -1744,53 +2028,23 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
-
-    int depth;
-    if (isELF32_ABI) {
-      VarArgsNumGPR = GPR_idx;
-      VarArgsNumFPR = FPR_idx;
-
-      // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame
-      // pointer.
-      depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 +
-                Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 +
-                PtrVT.getSizeInBits()/8);
-
-      VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                                  ArgOffset);
-
-    }
-    else
-      depth = ArgOffset;
+    int Depth = ArgOffset;
 
     VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                                               depth);
+                                               Depth);
     SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
 
-    // In ELF 32 ABI, the fixed integer arguments of a variadic function are
-    // stored to the VarArgsFrameIndex on the stack.
-    if (isELF32_ABI) {
-      for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) {
-        SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT);
-        SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
-        MemOps.push_back(Store);
-        // Increment the address by four for the next argument to store
-        SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
-        FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-      }
-    }
-
     // If this function is vararg, store any remaining integer argument regs
     // to their spots on the stack so that they may be loaded by deferencing the
     // result of va_next.
     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
       unsigned VReg;
+      
       if (isPPC64)
-        VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
       else
-        VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
 
-      RegInfo.addLiveIn(GPR[GPR_idx], VReg);
       SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, PtrVT);
       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
       MemOps.push_back(Store);
@@ -1798,34 +2052,6 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
     }
-
-    // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex
-    // on the stack.
-    if (isELF32_ABI) {
-      for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) {
-        SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64);
-        SDValue Store = DAG.getStore(Root, dl, Val, FIN, NULL, 0);
-        MemOps.push_back(Store);
-        // Increment the address by eight for the next argument to store
-        SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
-                                           PtrVT);
-        FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-      }
-
-      for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) {
-        unsigned VReg;
-        VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
-
-        RegInfo.addLiveIn(FPR[FPR_idx], VReg);
-        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::f64);
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
-        MemOps.push_back(Store);
-        // Increment the address by eight for the next argument to store
-        SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8,
-                                           PtrVT);
-        FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
-      }
-    }
   }
 
   if (!MemOps.empty())
@@ -1840,11 +2066,10 @@ PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op,
 }
 
 /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
-/// linkage area.
+/// linkage area for the Darwin ABI.
 static unsigned
 CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
                                      bool isPPC64,
-                                     bool isMachoABI,
                                      bool isVarArg,
                                      unsigned CC,
                                      CallSDNode *TheCall,
@@ -1852,7 +2077,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
   // Count how many bytes are to be pushed on the stack, including the linkage
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
-  unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+  unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, true);
   unsigned NumOps = TheCall->getNumArgs();
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
@@ -1879,7 +2104,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
       NumBytes = ((NumBytes+15)/16)*16;
     }
-    NumBytes += CalculateStackSlotSize(Arg, Flags, isVarArg, PtrByteSize);
+    NumBytes += CalculateStackSlotSize(Arg, Flags, PtrByteSize);
   }
 
    // Allow for Altivec parameters at the end, if needed.
@@ -1894,7 +2119,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
   // conservatively assume that it is needed.  As such, make sure we have at
   // least enough stack space for the caller to store the 8 GPRs.
   NumBytes = std::max(NumBytes,
-                      PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI));
+                      PPCFrameInfo::getMinCallFrameSize(isPPC64, true));
 
   // Tail call needs the stack to be aligned.
   if (CC==CallingConv::Fast && PerformTailCallOpt) {
@@ -2017,26 +2242,30 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                SDValue OldFP,
                                                int SPDiff,
                                                bool isPPC64,
-                                               bool isMachoABI,
+                                               bool isDarwinABI,
                                                DebugLoc dl) {
   if (SPDiff) {
     // Calculate the new stack slot for the return address.
     int SlotSize = isPPC64 ? 8 : 4;
     int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
-                                                                   isMachoABI);
+                                                                   isDarwinABI);
     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
                                                           NewRetAddrLoc);
-    int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
-                                                                    isMachoABI);
-    int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
-
     MVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
                          PseudoSourceValue::getFixedStack(NewRetAddr), 0);
-    SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
-    Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
-                         PseudoSourceValue::getFixedStack(NewFPIdx), 0);
+
+    // When using the SVR4 ABI there is no need to move the FP stack slot
+    // as the FP is never overwritten.
+    if (isDarwinABI) {
+      int NewFPLoc =
+        SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
+      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+                           PseudoSourceValue::getFixedStack(NewFPIdx), 0);
+    }
   }
   return Chain;
 }
@@ -2067,6 +2296,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
                                                         SDValue Chain,
                                                         SDValue &LROpOut,
                                                         SDValue &FPOpOut,
+                                                        bool isDarwinABI,
                                                         DebugLoc dl) {
   if (SPDiff) {
     // Load the LR and FP stack slot for later adjusting.
@@ -2074,9 +2304,14 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
     LROpOut = getReturnAddrFrameIndex(DAG);
     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0);
     Chain = SDValue(LROpOut.getNode(), 1);
-    FPOpOut = getFramePointerFrameIndex(DAG);
-    FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
-    Chain = SDValue(FPOpOut.getNode(), 1);
+    
+    // When using the SVR4 ABI there is no need to load the FP stack slot
+    // as the FP is never overwritten.
+    if (isDarwinABI) {
+      FPOpOut = getFramePointerFrameIndex(DAG);
+      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0);
+      Chain = SDValue(FPOpOut.getNode(), 1);
+    }
   }
   return Chain;
 }
@@ -2090,8 +2325,8 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
 static SDValue
 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
-                          unsigned Size, DebugLoc dl) {
-  SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+                          DebugLoc dl) {
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        false, NULL, 0, NULL, 0);
 }
@@ -2122,21 +2357,387 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
                                   TailCallArguments);
 }
 
-SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
-                                       const PPCSubtarget &Subtarget,
-                                       TargetMachine &TM) {
+static
+void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+                     DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+                     SDValue LROp, SDValue FPOp, bool isDarwinABI,
+                     SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+  // might overwrite each other in case of tail call optimization.
+  SmallVector<SDValue, 8> MemOpChains2;
+  // Do not flag preceeding copytoreg stuff together with the following stuff.
+  InFlag = SDValue();
+  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+                                    MemOpChains2, dl);
+  if (!MemOpChains2.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains2[0], MemOpChains2.size());
+
+  // Store the return address to the appropriate stack slot.
+  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+                                        isPPC64, isDarwinABI, dl);
+
+  // Emit callseq_end just before tailcall node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+}
+
+static
+unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
+                     SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
+                     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+                     SmallVector<SDValue, 8> &Ops, std::vector<MVT> &NodeTys,
+                     bool isSVR4ABI) {
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  NodeTys.push_back(MVT::Other);   // Returns a chain
+  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
+
+  unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
+  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
+  else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+    // If this is an absolute destination address, use the munged value.
+    Callee = SDValue(Dest, 0);
+  else {
+    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
+    // to do the call, we can't use PPCISD::CALL.
+    SDValue MTCTROps[] = {Chain, Callee, InFlag};
+    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
+                        2 + (InFlag.getNode() != 0));
+    InFlag = Chain.getValue(1);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::Other);
+    NodeTys.push_back(MVT::Flag);
+    Ops.push_back(Chain);
+    CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+    Callee.setNode(0);
+    // Add CTR register as callee so a bctr can be emitted later.
+    if (isTailCall)
+      Ops.push_back(DAG.getRegister(PPC::CTR, PtrVT));
+  }
+
+  // If this is a direct call, pass the chain and the callee.
+  if (Callee.getNode()) {
+    Ops.push_back(Chain);
+    Ops.push_back(Callee);
+  }
+  // If this is a tail call add stack pointer delta.
+  if (isTailCall)
+    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  return CallOpc;
+}
+
+static SDValue LowerCallReturn(SDValue Op, SelectionDAG &DAG, TargetMachine &TM,
+                               CallSDNode *TheCall, SDValue Chain,
+                               SDValue InFlag) {
+  bool isVarArg = TheCall->isVarArg();
+  DebugLoc dl = TheCall->getDebugLoc();
+  SmallVector<SDValue, 16> ResultVals;
+  SmallVector<CCValAssign, 16> RVLocs;
+  unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
+  CCState CCRetInfo(CallerCC, isVarArg, TM, RVLocs);
+  CCRetInfo.AnalyzeCallResult(TheCall, RetCC_PPC);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+    CCValAssign &VA = RVLocs[i];
+    MVT VT = VA.getValVT();
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    Chain = DAG.getCopyFromReg(Chain, dl,
+                               VA.getLocReg(), VT, InFlag).getValue(1);
+    ResultVals.push_back(Chain.getValue(0));
+    InFlag = Chain.getValue(2);
+  }
+
+  // If the function returns void, just return the chain.
+  if (RVLocs.empty())
+    return Chain;
+
+  // Otherwise, merge everything together with a MERGE_VALUES node.
+  ResultVals.push_back(Chain);
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+                            &ResultVals[0], ResultVals.size());
+  return Res.getValue(Op.getResNo());
+}
+
+static
+SDValue FinishCall(SelectionDAG &DAG, CallSDNode *TheCall, TargetMachine &TM,
+                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+                   SDValue Op, SDValue InFlag, SDValue Chain, SDValue &Callee,
+                   int SPDiff, unsigned NumBytes) {
+  unsigned CC = TheCall->getCallingConv();
+  DebugLoc dl = TheCall->getDebugLoc();
+  bool isTailCall = TheCall->isTailCall()
+                 && CC == CallingConv::Fast && PerformTailCallOpt;
+
+  std::vector<MVT> NodeTys;
+  SmallVector<SDValue, 8> Ops;
+  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
+                                 isTailCall, RegsToPass, Ops, NodeTys,
+                                 TM.getSubtarget<PPCSubtarget>().isSVR4ABI());
+
+  // When performing tail call optimization the callee pops its arguments off
+  // the stack. Account for this here so these bytes can be pushed back on in
+  // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+  int BytesCalleePops =
+    (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  // Emit tail call.
+  if (isTailCall) {
+    assert(InFlag.getNode() &&
+           "Flag must be set. Depend on flag being set in LowerRET");
+    Chain = DAG.getNode(PPCISD::TAILCALL, dl,
+                        TheCall->getVTList(), &Ops[0], Ops.size());
+    return SDValue(Chain.getNode(), Op.getResNo());
+  }
+
+  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(BytesCalleePops, true),
+                             InFlag);
+  if (TheCall->getValueType(0) != MVT::Other)
+    InFlag = Chain.getValue(1);
+
+  return LowerCallReturn(Op, DAG, TM, TheCall, Chain, InFlag);
+}
+
+SDValue PPCTargetLowering::LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
+                                          const PPCSubtarget &Subtarget,
+                                          TargetMachine &TM) {
+  // See PPCTargetLowering::LowerFORMAL_ARGUMENTS_SVR4() for a description
+  // of the SVR4 ABI stack frame layout.
   CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
   SDValue Chain  = TheCall->getChain();
   bool isVarArg   = TheCall->isVarArg();
   unsigned CC     = TheCall->getCallingConv();
+  assert((CC == CallingConv::C ||
+          CC == CallingConv::Fast) && "Unknown calling convention!");
   bool isTailCall = TheCall->isTailCall()
                  && CC == CallingConv::Fast && PerformTailCallOpt;
   SDValue Callee = TheCall->getCallee();
-  unsigned NumOps  = TheCall->getNumArgs();
   DebugLoc dl = TheCall->getDebugLoc();
 
-  bool isMachoABI = Subtarget.isMachoABI();
-  bool isELF32_ABI  = Subtarget.isELF32_ABI();
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  unsigned PtrByteSize = 4;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Mark this function as potentially containing a function that contains a
+  // tail call. As a consequence the frame pointer will be used for dynamicalloc
+  // and restoring the callers stack pointer in this functions epilog. This is
+  // done because by tail calling the called function might overwrite the value
+  // in this function's (MF) stack pointer stack slot 0(SP).
+  if (PerformTailCallOpt && CC==CallingConv::Fast)
+    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+  
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, parameter list area and the part of the local variable space which
+  // contains copies of aggregates which are passed by value.
+
+  // Assign locations to all of the outgoing arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+
+  // Reserve space for the linkage area on the stack.
+  CCInfo.AllocateStack(PPCFrameInfo::getLinkageSize(false, false), PtrByteSize);
+
+  if (isVarArg) {
+    // Handle fixed and variable vector arguments differently.
+    // Fixed vector arguments go into registers as long as registers are
+    // available. Variable vector arguments always go into memory.
+    unsigned NumArgs = TheCall->getNumArgs();
+    unsigned NumFixedArgs = TheCall->getNumFixedArgs();
+    
+    for (unsigned i = 0; i != NumArgs; ++i) {
+      MVT ArgVT = TheCall->getArg(i).getValueType();
+      ISD::ArgFlagsTy ArgFlags = TheCall->getArgFlags(i);
+      bool Result;
+      
+      if (i < NumFixedArgs) {
+        Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+                             CCInfo);
+      } else {
+        Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+                                    ArgFlags, CCInfo);
+      }
+      
+      if (Result) {
+        cerr << "Call operand #" << i << " has unhandled type "
+             << ArgVT.getMVTString() << "\n";
+        abort();
+      }
+    }
+  } else {
+    // All arguments are treated the same.
+    CCInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4);
+  }
+  
+  // Assign locations to all of the outgoing aggregate by value arguments.
+  SmallVector<CCValAssign, 16> ByValArgLocs;
+  CCState CCByValInfo(CC, isVarArg, getTargetMachine(), ByValArgLocs);
+
+  // Reserve stack space for the allocations in CCInfo.
+  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+  CCByValInfo.AnalyzeCallOperands(TheCall, CC_PPC_SVR4_ByVal);
+
+  // Size of the linkage area, parameter list area and the part of the local
+  // space variable where copies of aggregates which are passed by value are
+  // stored.
+  unsigned NumBytes = CCByValInfo.getNextStackOffset();
+  
+  // Calculate by how many bytes the stack has to be adjusted in case of tail
+  // call optimization.
+  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  SDValue CallSeqStart = Chain;
+
+  // Load the return address and frame pointer so it can be moved somewhere else
+  // later.
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
+                                       dl);
+
+  // Set up a copy of the stack pointer for use loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+  
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, j = 0, e = ArgLocs.size();
+       i != e;
+       ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = TheCall->getArg(i);
+    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
+    
+    if (Flags.isByVal()) {
+      // Argument is an aggregate which is passed by value, thus we need to
+      // create a copy of it in the local variable space of the current stack
+      // frame (which is the stack frame of the caller) and pass the address of
+      // this copy to the callee.
+      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+      CCValAssign &ByValVA = ByValArgLocs[j++];
+      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+      
+      // Memory reserved in the local variable space of the callers stack frame.
+      unsigned LocMemOffset = ByValVA.getLocMemOffset();
+      
+      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+      
+      // Create a copy of the argument in the local area of the current
+      // stack frame.
+      SDValue MemcpyCall =
+        CreateCopyOfByValArgument(Arg, PtrOff,
+                                  CallSeqStart.getNode()->getOperand(0),
+                                  Flags, DAG, dl);
+      
+      // This must go outside the CALLSEQ_START..END.
+      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                           CallSeqStart.getNode()->getOperand(1));
+      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+                             NewCallSeqStart.getNode());
+      Chain = CallSeqStart = NewCallSeqStart;
+      
+      // Pass the address of the aggregate copy on the stack either in a
+      // physical register or in the parameter list area of the current stack
+      // frame to the callee.
+      Arg = PtrOff;
+    }
+    
+    if (VA.isRegLoc()) {
+      // Put argument in a physical register.
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      // Put argument in the parameter list area of the current stack frame.
+      assert(VA.isMemLoc());
+      unsigned LocMemOffset = VA.getLocMemOffset();
+
+      if (!isTailCall) {
+        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                              PseudoSourceValue::getStack(), LocMemOffset));
+      } else {
+        // Calculate and remember argument location.
+        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+                                 TailCallArguments);
+      }
+    }
+  }
+  
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+  
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+  
+  // Set CR6 to true if this is a vararg call.
+  if (isVarArg) {
+    SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0);
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  if (isTailCall) {
+    PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+                    false, TailCallArguments);
+  }
+
+  return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee,
+                    SPDiff, NumBytes);
+}
+
+SDValue PPCTargetLowering::LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
+                                            const PPCSubtarget &Subtarget,
+                                            TargetMachine &TM) {
+  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+  SDValue Chain  = TheCall->getChain();
+  bool isVarArg   = TheCall->isVarArg();
+  unsigned CC     = TheCall->getCallingConv();
+  bool isTailCall = TheCall->isTailCall()
+                 && CC == CallingConv::Fast && PerformTailCallOpt;
+  SDValue Callee = TheCall->getCallee();
+  unsigned NumOps  = TheCall->getNumArgs();
+  DebugLoc dl = TheCall->getDebugLoc();
 
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
@@ -2144,10 +2745,6 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
 
   MachineFunction &MF = DAG.getMachineFunction();
 
-  // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
-  // SelectExpr to use to put the arguments in the appropriate registers.
-  std::vector<SDValue> args_to_use;
-
   // Mark this function as potentially containing a function that contains a
   // tail call. As a consequence the frame pointer will be used for dynamicalloc
   // and restoring the callers stack pointer in this functions epilog. This is
@@ -2162,8 +2759,8 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
   // area, and parameter passing area.  We start with 24/48 bytes, which is
   // prereserved space for [SP][CR][LR][3 x unused].
   unsigned NumBytes =
-    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC,
-                                         TheCall, nAltivecParamsAtEnd);
+    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CC, TheCall,
+                                         nAltivecParamsAtEnd);
 
   // Calculate by how many bytes the stack has to be adjusted in case of tail
   // call optimization.
@@ -2177,7 +2774,8 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
   // Load the return address and frame pointer so it can be move somewhere else
   // later.
   SDValue LROp, FPOp;
-  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+                                       dl);
 
   // Set up a copy of the stack pointer for use loading and storing any
   // arguments that may not fit in the registers available for argument
@@ -2192,7 +2790,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
   // memory.  Also, if this is a vararg function, floating point operations
   // must be stored to our stack, and loaded into integer regs as well, if
   // any integer regs are available for argument passing.
-  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI);
+  unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, true);
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
 
   static const unsigned GPR_32[] = {           // 32-bit registers.
@@ -2210,12 +2808,12 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
   const unsigned NumGPRs = array_lengthof(GPR_32);
-  const unsigned NumFPRs = isMachoABI ? 13 : 8;
-  const unsigned NumVRs  = array_lengthof( VR);
+  const unsigned NumFPRs = 13;
+  const unsigned NumVRs  = array_lengthof(VR);
 
   const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
 
-  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
 
   SmallVector<SDValue, 8> MemOpChains;
@@ -2223,19 +2821,12 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
     bool inMem = false;
     SDValue Arg = TheCall->getArg(i);
     ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
-    // See if next argument requires stack alignment in ELF
-    bool Align = Flags.isSplit();
 
     // PtrOff will be used to store the current argument to the stack if a
     // register cannot be found for it.
     SDValue PtrOff;
 
-    // Stack align in ELF 32
-    if (isELF32_ABI && Align)
-      PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize,
-                               StackPtr.getValueType());
-    else
-      PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
 
     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
 
@@ -2246,11 +2837,9 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
     }
 
-    // FIXME Elf untested, what are alignment rules?
     // FIXME memcpy is used way more than necessary.  Correctness first.
     if (Flags.isByVal()) {
       unsigned Size = Flags.getByValSize();
-      if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
       if (Size==1 || Size==2) {
         // Very small objects are passed right-justified.
         // Everything else is passed left-justified.
@@ -2260,14 +2849,14 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
                                           NULL, 0, VT);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
-          if (isMachoABI)
-            ArgOffset += PtrByteSize;
+
+          ArgOffset += PtrByteSize;
         } else {
           SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
           SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
                                 CallSeqStart.getNode()->getOperand(0),
-                                Flags, DAG, Size, dl);
+                                Flags, DAG, dl);
           // This must go outside the CALLSEQ_START..END.
           SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
                                CallSeqStart.getNode()->getOperand(1));
@@ -2283,7 +2872,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
       // registers.  (This is not what the doc says.)
       SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
                             CallSeqStart.getNode()->getOperand(0),
-                            Flags, DAG, Size, dl);
+                            Flags, DAG, dl);
       // This must go outside the CALLSEQ_START..END.
       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
                            CallSeqStart.getNode()->getOperand(1));
@@ -2297,8 +2886,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, NULL, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
-          if (isMachoABI)
-            ArgOffset += PtrByteSize;
+          ArgOffset += PtrByteSize;
         } else {
           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
           break;
@@ -2311,8 +2899,6 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
     default: assert(0 && "Unexpected ValueType for argument!");
     case MVT::i32:
     case MVT::i64:
-      // Double word align in ELF
-      if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2);
       if (GPR_idx != NumGPRs) {
         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
       } else {
@@ -2321,13 +2907,7 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
                          TailCallArguments, dl);
         inMem = true;
       }
-      if (inMem || isMachoABI) {
-        // Stack align in ELF
-        if (isELF32_ABI && Align)
-          ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
-
-        ArgOffset += PtrByteSize;
-      }
+      ArgOffset += PtrByteSize;
       break;
     case MVT::f32:
     case MVT::f64:
@@ -2342,28 +2922,24 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
           if (GPR_idx != NumGPRs) {
             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
             MemOpChains.push_back(Load.getValue(1));
-            if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
-                                                                Load));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, NULL, 0);
             MemOpChains.push_back(Load.getValue(1));
-            if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++],
-                                                                Load));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
         } else {
           // If we have any FPRs remaining, we may also have GPRs remaining.
           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
           // GPRs.
-          if (isMachoABI) {
-            if (GPR_idx != NumGPRs)
-              ++GPR_idx;
-            if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
-                !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
-              ++GPR_idx;
-          }
+          if (GPR_idx != NumGPRs)
+            ++GPR_idx;
+          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
+            ++GPR_idx;
         }
       } else {
         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
@@ -2371,15 +2947,10 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
                          TailCallArguments, dl);
         inMem = true;
       }
-      if (inMem || isMachoABI) {
-        // Stack align in ELF
-        if (isELF32_ABI && Align)
-          ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize;
-        if (isPPC64)
-          ArgOffset += 8;
-        else
-          ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
-      }
+      if (isPPC64)
+        ArgOffset += 8;
+      else
+        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
       break;
     case MVT::v4f32:
     case MVT::v4i32:
@@ -2475,148 +3046,13 @@ SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG,
     InFlag = Chain.getValue(1);
   }
 
-  // With the ELF 32 ABI, set CR6 to true if this is a vararg call.
-  if (isVarArg && isELF32_ABI) {
-    SDValue SetCR(DAG.getTargetNode(PPC::CRSET, dl, MVT::i32), 0);
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
-  // might overwrite each other in case of tail call optimization.
   if (isTailCall) {
-    SmallVector<SDValue, 8> MemOpChains2;
-    // Do not flag preceeding copytoreg stuff together with the following stuff.
-    InFlag = SDValue();
-    StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
-                                      MemOpChains2, dl);
-    if (!MemOpChains2.empty())
-      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                          &MemOpChains2[0], MemOpChains2.size());
-
-    // Store the return address to the appropriate stack slot.
-    Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
-                                          isPPC64, isMachoABI, dl);
-  }
-
-  // Emit callseq_end just before tailcall node.
-  if (isTailCall) {
-    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                               DAG.getIntPtrConstant(0, true), InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  std::vector<MVT> NodeTys;
-  NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
-
-  SmallVector<SDValue, 8> Ops;
-  unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF;
-
-  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
-  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
-  // node so that legalize doesn't hack it.
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
-  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
-  else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
-    // If this is an absolute destination address, use the munged value.
-    Callee = SDValue(Dest, 0);
-  else {
-    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
-    // to do the call, we can't use PPCISD::CALL.
-    SDValue MTCTROps[] = {Chain, Callee, InFlag};
-    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
-                        2 + (InFlag.getNode() != 0));
-    InFlag = Chain.getValue(1);
-
-    // Copy the callee address into R12/X12 on darwin.
-    if (isMachoABI) {
-      unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12;
-      Chain = DAG.getCopyToReg(Chain, dl, Reg, Callee, InFlag);
-      InFlag = Chain.getValue(1);
-    }
-
-    NodeTys.clear();
-    NodeTys.push_back(MVT::Other);
-    NodeTys.push_back(MVT::Flag);
-    Ops.push_back(Chain);
-    CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF;
-    Callee.setNode(0);
-    // Add CTR register as callee so a bctr can be emitted later.
-    if (isTailCall)
-      Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy()));
-  }
-
-  // If this is a direct call, pass the chain and the callee.
-  if (Callee.getNode()) {
-    Ops.push_back(Chain);
-    Ops.push_back(Callee);
+    PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+                    FPOp, true, TailCallArguments);
   }
-  // If this is a tail call add stack pointer delta.
-  if (isTailCall)
-    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
 
-  // Add argument registers to the end of the list so that they are known live
-  // into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
-
-  // When performing tail call optimization the callee pops its arguments off
-  // the stack. Account for this here so these bytes can be pushed back on in
-  // PPCRegisterInfo::eliminateCallFramePseudoInstr.
-  int BytesCalleePops =
-    (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0;
-
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
-
-  // Emit tail call.
-  if (isTailCall) {
-    assert(InFlag.getNode() &&
-           "Flag must be set. Depend on flag being set in LowerRET");
-    Chain = DAG.getNode(PPCISD::TAILCALL, dl,
-                        TheCall->getVTList(), &Ops[0], Ops.size());
-    return SDValue(Chain.getNode(), Op.getResNo());
-  }
-
-  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(BytesCalleePops, true),
-                             InFlag);
-  if (TheCall->getValueType(0) != MVT::Other)
-    InFlag = Chain.getValue(1);
-
-  SmallVector<SDValue, 16> ResultVals;
-  SmallVector<CCValAssign, 16> RVLocs;
-  unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv();
-  CCState CCInfo(CallerCC, isVarArg, TM, RVLocs);
-  CCInfo.AnalyzeCallResult(TheCall, RetCC_PPC);
-
-  // Copy all of the result registers out of their specified physreg.
-  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
-    CCValAssign &VA = RVLocs[i];
-    MVT VT = VA.getValVT();
-    assert(VA.isRegLoc() && "Can only return in registers!");
-    Chain = DAG.getCopyFromReg(Chain, dl,
-                               VA.getLocReg(), VT, InFlag).getValue(1);
-    ResultVals.push_back(Chain.getValue(0));
-    InFlag = Chain.getValue(2);
-  }
-
-  // If the function returns void, just return the chain.
-  if (RVLocs.empty())
-    return Chain;
-
-  // Otherwise, merge everything together with a MERGE_VALUES node.
-  ResultVals.push_back(Chain);
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
-                            &ResultVals[0], ResultVals.size());
-  return Res.getValue(Op.getResNo());
+  return FinishCall(DAG, TheCall, TM, RegsToPass, Op, InFlag, Chain, Callee,
+                    SPDiff, NumBytes);
 }
 
 SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG,
@@ -2716,7 +3152,7 @@ SDValue
 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool IsPPC64 = PPCSubTarget.isPPC64();
-  bool isMachoABI = PPCSubTarget.isMachoABI();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Get current frame pointer save index.  The users of this index will be
@@ -2727,7 +3163,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!RASI) {
     // Find out what the fix offset of the frame pointer save area.
-    int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI);
+    int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
     // Allocate the frame index for frame pointer save area.
     RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
     // Save the result.
@@ -2740,7 +3176,7 @@ SDValue
 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool IsPPC64 = PPCSubTarget.isPPC64();
-  bool isMachoABI = PPCSubTarget.isMachoABI();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
   MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 
   // Get current frame pointer save index.  The users of this index will be
@@ -2751,7 +3187,8 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
   // If the frame pointer save index hasn't been defined yet.
   if (!FPSI) {
     // Find out what the fix offset of the frame pointer save area.
-    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI);
+    int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
+                                                           isDarwinABI);
 
     // Allocate the frame index for frame pointer save area.
     FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
@@ -3729,12 +4166,22 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
                       VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget);
 
   case ISD::FORMAL_ARGUMENTS:
-    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex,
-                                 VarArgsStackOffset, VarArgsNumGPR,
-                                 VarArgsNumFPR, PPCSubTarget);
+    if (PPCSubTarget.isSVR4ABI()) {
+      return LowerFORMAL_ARGUMENTS_SVR4(Op, DAG, VarArgsFrameIndex,
+                                        VarArgsStackOffset, VarArgsNumGPR,
+                                        VarArgsNumFPR, PPCSubTarget);
+    } else {
+      return LowerFORMAL_ARGUMENTS_Darwin(Op, DAG, VarArgsFrameIndex,
+                                          PPCSubTarget);
+    }
 
-  case ISD::CALL:               return LowerCALL(Op, DAG, PPCSubTarget,
-                                                 getTargetMachine());
+  case ISD::CALL:
+    if (PPCSubTarget.isSVR4ABI()) {
+      return LowerCALL_SVR4(Op, DAG, PPCSubTarget, getTargetMachine());
+    } else {
+      return LowerCALL_Darwin(Op, DAG, PPCSubTarget, getTargetMachine());
+    }
+    
   case ISD::RET:                return LowerRET(Op, DAG, getTargetMachine());
   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
   case ISD::DYNAMIC_STACKALLOC:
@@ -4878,3 +5325,13 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The PowerPC target isn't yet aware of offsets.
   return false;
 }
+
+MVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
+                                           bool isSrcConst, bool isSrcStr,
+                                           SelectionDAG &DAG) const {
+  if (this->PPCSubTarget.isPPC64()) {
+    return MVT::i64;
+  } else {
+    return MVT::i32;
+  }
+}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b6d046f..962bbb1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -82,7 +82,7 @@ namespace llvm {
       STD_32,
       
       /// CALL - A direct function call.
-      CALL_Macho, CALL_ELF,
+      CALL_Darwin, CALL_SVR4,
       
       /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
       /// MTCTR instruction.
@@ -90,7 +90,7 @@ namespace llvm {
       
       /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
       /// BCTRL instruction.
-      BCTRL_Macho, BCTRL_ELF,
+      BCTRL_Darwin, BCTRL_SVR4,
       
       /// Return with a flag operand, matched by 'blr'
       RET_FLAG,
@@ -223,7 +223,6 @@ namespace llvm {
                                       // register for parameter passing.
     unsigned VarArgsNumFPR;           // Index of the first unused double
                                       // register for parameter passing.
-    int ReturnAddrIndex;              // FrameIndex for return slot.
     const PPCSubtarget &PPCSubTarget;
   public:
     explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -336,6 +335,13 @@ namespace llvm {
                                                    SelectionDAG &DAG) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+    
+    virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
+                                    bool isSrcConst, bool isSrcStr,
+                                    SelectionDAG &DAG) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
 
   private:
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
@@ -346,6 +352,7 @@ namespace llvm {
                                          SDValue Chain,
                                          SDValue &LROpOut,
                                          SDValue &FPOpOut,
+                                         bool isDarwinABI,
                                          DebugLoc dl);
 
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
@@ -363,14 +370,19 @@ namespace llvm {
     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex,
                          int VarArgsStackOffset, unsigned VarArgsNumGPR,
                          unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget);
-    SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG,
-                                    int &VarArgsFrameIndex, 
-                                    int &VarArgsStackOffset,
-                                    unsigned &VarArgsNumGPR,
-                                    unsigned &VarArgsNumFPR,
-                                    const PPCSubtarget &Subtarget);
-    SDValue LowerCALL(SDValue Op, SelectionDAG &DAG,
-                        const PPCSubtarget &Subtarget, TargetMachine &TM);
+    SDValue LowerFORMAL_ARGUMENTS_SVR4(SDValue Op, SelectionDAG &DAG,
+                                       int &VarArgsFrameIndex, 
+                                       int &VarArgsStackOffset,
+                                       unsigned &VarArgsNumGPR,
+                                       unsigned &VarArgsNumFPR,
+                                       const PPCSubtarget &Subtarget);
+    SDValue LowerFORMAL_ARGUMENTS_Darwin(SDValue Op, SelectionDAG &DAG,
+                                         int &VarArgsFrameIndex, 
+                                         const PPCSubtarget &Subtarget);
+    SDValue LowerCALL_Darwin(SDValue Op, SelectionDAG &DAG,
+                             const PPCSubtarget &Subtarget, TargetMachine &TM);
+    SDValue LowerCALL_SVR4(SDValue Op, SelectionDAG &DAG,
+                           const PPCSubtarget &Subtarget, TargetMachine &TM);
     SDValue LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM);
     SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
                                 const PPCSubtarget &Subtarget);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 417c8ed..3823e53 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -61,7 +61,7 @@ let Defs = [LR8] in
   def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "bl $label", []>,
                     PPC970_Unit_BRU;
 
-// Macho ABI Calls.
+// Darwin ABI Calls.
 let isCall = 1, PPC970_Unit = 7, 
   // All calls clobber the PPC64 non-callee saved registers.
   Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
@@ -71,22 +71,22 @@ let isCall = 1, PPC970_Unit = 7,
           CR0,CR1,CR5,CR6,CR7] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL8_Macho  : IForm<18, 0, 1,
-                           (outs), (ins calltarget:$func, variable_ops), 
-                           "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA8_Macho : IForm<18, 1, 1,
-                           (outs), (ins aaddr:$func, variable_ops),
-                           "bla $func", BrB, [(PPCcall_Macho (i64 imm:$func))]>;
+    def BL8_Darwin  : IForm<18, 0, 1,
+                            (outs), (ins calltarget:$func, variable_ops), 
+                            "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA8_Darwin : IForm<18, 1, 1,
+                          (outs), (ins aaddr:$func, variable_ops),
+                          "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
   }
   let Uses = [CTR8, RM] in {
-    def BCTRL8_Macho : XLForm_2_ext<19, 528, 20, 0, 1, 
-                                 (outs), (ins variable_ops),
-                                 "bctrl", BrB,
-                                 [(PPCbctrl_Macho)]>, Requires<[In64BitMode]>;
+    def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
+                                  (outs), (ins variable_ops),
+                                  "bctrl", BrB,
+                                  [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
   }
 }
 
-// ELF 64 ABI Calls = Macho ABI Calls
+// ELF 64 ABI Calls = Darwin ABI Calls
 // Used to define BL8_ELF and BLA8_ELF
 let isCall = 1, PPC970_Unit = 7, 
   // All calls clobber the PPC64 non-callee saved registers.
@@ -102,26 +102,26 @@ let isCall = 1, PPC970_Unit = 7,
                          "bl $func", BrB, []>;  // See Pat patterns below.                            
     def BLA8_ELF : IForm<18, 1, 1,
                          (outs), (ins aaddr:$func, variable_ops),
-                         "bla $func", BrB, [(PPCcall_ELF (i64 imm:$func))]>;
+                         "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
   }
   let Uses = [CTR8, RM] in {
     def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
                                (outs), (ins variable_ops),
                                "bctrl", BrB,
-                               [(PPCbctrl_ELF)]>, Requires<[In64BitMode]>;
+                               [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
   }
 }
 
 
 // Calls
-def : Pat<(PPCcall_Macho (i64 tglobaladdr:$dst)),
-          (BL8_Macho tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Macho (i64 texternalsym:$dst)),
-          (BL8_Macho texternalsym:$dst)>;
+def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
+          (BL8_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
+          (BL8_Darwin texternalsym:$dst)>;
 
-def : Pat<(PPCcall_ELF (i64 tglobaladdr:$dst)),
+def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
           (BL8_ELF tglobaladdr:$dst)>;
-def : Pat<(PPCcall_ELF (i64 texternalsym:$dst)),
+def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
           (BL8_ELF texternalsym:$dst)>;
 
 // Atomic operations
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 778f034..87c612a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -444,21 +444,29 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
     // not cause any bug. If we need other uses of CR bits, the following
     // code may be invalid.
     unsigned Reg = 0;
-    if (SrcReg >= PPC::CR0LT || SrcReg <= PPC::CR0UN) 
+    if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+        SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
       Reg = PPC::CR0;
-    else if (SrcReg >= PPC::CR1LT || SrcReg <= PPC::CR1UN) 
+    else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+             SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
       Reg = PPC::CR1;
-    else if (SrcReg >= PPC::CR2LT || SrcReg <= PPC::CR2UN) 
+    else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+             SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
       Reg = PPC::CR2;
-    else if (SrcReg >= PPC::CR3LT || SrcReg <= PPC::CR3UN) 
+    else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+             SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
       Reg = PPC::CR3;
-    else if (SrcReg >= PPC::CR4LT || SrcReg <= PPC::CR4UN) 
+    else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+             SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
       Reg = PPC::CR4;
-    else if (SrcReg >= PPC::CR5LT || SrcReg <= PPC::CR5UN) 
+    else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+             SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
       Reg = PPC::CR5;
-    else if (SrcReg >= PPC::CR6LT || SrcReg <= PPC::CR6UN) 
+    else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+             SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
       Reg = PPC::CR6;
-    else if (SrcReg >= PPC::CR7LT || SrcReg <= PPC::CR7UN) 
+    else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+             SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
       Reg = PPC::CR7;
 
     return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, 
@@ -587,21 +595,29 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
   } else if (RC == PPC::CRBITRCRegisterClass) {
    
     unsigned Reg = 0;
-    if (DestReg >= PPC::CR0LT || DestReg <= PPC::CR0UN) 
+    if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
+        DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
       Reg = PPC::CR0;
-    else if (DestReg >= PPC::CR1LT || DestReg <= PPC::CR1UN) 
+    else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
+             DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
       Reg = PPC::CR1;
-    else if (DestReg >= PPC::CR2LT || DestReg <= PPC::CR2UN) 
+    else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
+             DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
       Reg = PPC::CR2;
-    else if (DestReg >= PPC::CR3LT || DestReg <= PPC::CR3UN) 
+    else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
+             DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
       Reg = PPC::CR3;
-    else if (DestReg >= PPC::CR4LT || DestReg <= PPC::CR4UN) 
+    else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
+             DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
       Reg = PPC::CR4;
-    else if (DestReg >= PPC::CR5LT || DestReg <= PPC::CR5UN) 
+    else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
+             DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
       Reg = PPC::CR5;
-    else if (DestReg >= PPC::CR6LT || DestReg <= PPC::CR6UN) 
+    else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
+             DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
       Reg = PPC::CR6;
-    else if (DestReg >= PPC::CR7LT || DestReg <= PPC::CR7UN) 
+    else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
+             DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
       Reg = PPC::CR7;
 
     return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, 
@@ -691,16 +707,21 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW))
-                                .addReg(InReg, getKillRegState(isKill)),
+                                .addReg(InReg,
+                                        getKillRegState(isKill) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ))
                                 .addReg(OutReg,
                                         RegState::Define |
-                                        getDeadRegState(isDead)),
+                                        getDeadRegState(isDead) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     }
   } else if ((Opc == PPC::OR8 &&
@@ -708,48 +729,63 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD))
-                                .addReg(InReg, getKillRegState(isKill)),
+                                .addReg(InReg,
+                                        getKillRegState(isKill) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD))
                                 .addReg(OutReg,
                                         RegState::Define |
-                                        getDeadRegState(isDead)),
+                                        getDeadRegState(isDead) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     }
   } else if (Opc == PPC::FMRD) {
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD))
-                                .addReg(InReg, getKillRegState(isKill)),
+                                .addReg(InReg,
+                                        getKillRegState(isKill) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD))
                                 .addReg(OutReg,
                                         RegState::Define |
-                                        getDeadRegState(isDead)),
+                                        getDeadRegState(isDead) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     }
   } else if (Opc == PPC::FMRS) {
     if (OpNum == 0) {  // move -> store
       unsigned InReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS))
-                                .addReg(InReg, getKillRegState(isKill)),
+                                .addReg(InReg,
+                                        getKillRegState(isKill) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     } else {           // move -> load
       unsigned OutReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS))
                                 .addReg(OutReg,
                                         RegState::Define |
-                                        getDeadRegState(isDead)),
+                                        getDeadRegState(isDead) |
+                                        getUndefRegState(isUndef)),
                                 FrameIndex);
     }
   }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 772e25a..7af59a2 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -107,17 +107,17 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeqEnd,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
 def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
-def PPCcall_Macho : SDNode<"PPCISD::CALL_Macho", SDT_PPCCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def PPCcall_ELF   : SDNode<"PPCISD::CALL_ELF", SDT_PPCCall,
+def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
-def PPCbctrl_Macho  : SDNode<"PPCISD::BCTRL_Macho", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
-def PPCbctrl_ELF  : SDNode<"PPCISD::BCTRL_ELF", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def PPCbctrl_SVR4  : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
+                            [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
 
 def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
                            [SDNPHasChain, SDNPOptInFlag]>;
@@ -412,7 +412,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
                   /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
 }
 
-// Macho ABI Calls.
+// Darwin ABI Calls.
 let isCall = 1, PPC970_Unit = 7, 
   // All calls clobber the non-callee saved registers...
   Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
@@ -424,26 +424,26 @@ let isCall = 1, PPC970_Unit = 7,
           CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL_Macho  : IForm<18, 0, 1,
-                          (outs), (ins calltarget:$func, variable_ops), 
-                          "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA_Macho : IForm<18, 1, 1, 
+    def BL_Darwin  : IForm<18, 0, 1,
+                           (outs), (ins calltarget:$func, variable_ops), 
+                           "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA_Darwin : IForm<18, 1, 1, 
                           (outs), (ins aaddr:$func, variable_ops),
-                          "bla $func", BrB, [(PPCcall_Macho (i32 imm:$func))]>;
+                          "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
   }
   let Uses = [CTR, RM] in {
-    def BCTRL_Macho : XLForm_2_ext<19, 528, 20, 0, 1, 
-                                 (outs), (ins variable_ops),
-                                 "bctrl", BrB,
-                                 [(PPCbctrl_Macho)]>, Requires<[In32BitMode]>;
+    def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
+                                  (outs), (ins variable_ops),
+                                  "bctrl", BrB,
+                                  [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
   }
 }
 
-// ELF ABI Calls.
+// SVR4 ABI Calls.
 let isCall = 1, PPC970_Unit = 7, 
   // All calls clobber the non-callee saved registers...
-  Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,
+  Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
           V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
           LR,CTR,
           CR0,CR1,CR5,CR6,CR7,
@@ -451,19 +451,19 @@ let isCall = 1, PPC970_Unit = 7,
           CR5UN,CR6LT,CR6GT,CR6EQ,CR6UN,CR7LT,CR7GT,CR7EQ,CR7UN] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
-    def BL_ELF  : IForm<18, 0, 1,
+    def BL_SVR4  : IForm<18, 0, 1,
                         (outs), (ins calltarget:$func, variable_ops), 
                         "bl $func", BrB, []>;  // See Pat patterns below.
-    def BLA_ELF : IForm<18, 1, 1,
+    def BLA_SVR4 : IForm<18, 1, 1,
                         (outs), (ins aaddr:$func, variable_ops),
                         "bla $func", BrB,
-                        [(PPCcall_ELF (i32 imm:$func))]>;
+                        [(PPCcall_SVR4 (i32 imm:$func))]>;
   }
   let Uses = [CTR, RM] in {
-    def BCTRL_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
-                               (outs), (ins variable_ops),
-                               "bctrl", BrB,
-                               [(PPCbctrl_ELF)]>, Requires<[In32BitMode]>;
+    def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
+                                (outs), (ins variable_ops),
+                                "bctrl", BrB,
+                                [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
   }
 }
 
@@ -1389,14 +1389,14 @@ def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
           (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
 
 // Calls
-def : Pat<(PPCcall_Macho (i32 tglobaladdr:$dst)),
-          (BL_Macho tglobaladdr:$dst)>;
-def : Pat<(PPCcall_Macho (i32 texternalsym:$dst)),
-          (BL_Macho texternalsym:$dst)>;
-def : Pat<(PPCcall_ELF (i32 tglobaladdr:$dst)),
-          (BL_ELF tglobaladdr:$dst)>;
-def : Pat<(PPCcall_ELF (i32 texternalsym:$dst)),
-          (BL_ELF texternalsym:$dst)>;
+def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
+          (BL_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
+          (BL_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
+          (BL_SVR4 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
+          (BL_SVR4 texternalsym:$dst)>;
 
 
 def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 035647e..7486d74 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -354,7 +354,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
     JCE.emitWordBE(0xf821ffb1);     // stdu r1,-80(r1)
     JCE.emitWordBE(0x7d6802a6);     // mflr r11
     JCE.emitWordBE(0xf9610060);     // std r11, 96(r1)
-  } else if (TM.getSubtargetImpl()->isMachoABI()){
+  } else if (TM.getSubtargetImpl()->isDarwinABI()){
     JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
     JCE.emitWordBE(0x7d6802a6);     // mflr r11
     JCE.emitWordBE(0x91610028);     // stw r11, 40(r1)
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index cb31506..97b1c57 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -149,7 +149,7 @@ const TargetRegisterClass *PPCRegisterInfo::getPointerRegClass() const {
 const unsigned*
 PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   // 32-bit Darwin calling convention. 
-  static const unsigned Macho32_CalleeSavedRegs[] = {
+  static const unsigned Darwin32_CalleeSavedRegs[] = {
               PPC::R13, PPC::R14, PPC::R15,
     PPC::R16, PPC::R17, PPC::R18, PPC::R19,
     PPC::R20, PPC::R21, PPC::R22, PPC::R23,
@@ -174,15 +174,13 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::LR,  0
   };
   
-  static const unsigned ELF32_CalleeSavedRegs[] = {
-              PPC::R13, PPC::R14, PPC::R15,
+  static const unsigned SVR4_CalleeSavedRegs[] = {
+                        PPC::R14, PPC::R15,
     PPC::R16, PPC::R17, PPC::R18, PPC::R19,
     PPC::R20, PPC::R21, PPC::R22, PPC::R23,
     PPC::R24, PPC::R25, PPC::R26, PPC::R27,
     PPC::R28, PPC::R29, PPC::R30, PPC::R31,
 
-                                  PPC::F9,
-    PPC::F10, PPC::F11, PPC::F12, PPC::F13,
     PPC::F14, PPC::F15, PPC::F16, PPC::F17,
     PPC::F18, PPC::F19, PPC::F20, PPC::F21,
     PPC::F22, PPC::F23, PPC::F24, PPC::F25,
@@ -190,6 +188,9 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::F30, PPC::F31,
     
     PPC::CR2, PPC::CR3, PPC::CR4,
+    
+    PPC::VRSAVE,
+    
     PPC::V20, PPC::V21, PPC::V22, PPC::V23,
     PPC::V24, PPC::V25, PPC::V26, PPC::V27,
     PPC::V28, PPC::V29, PPC::V30, PPC::V31,
@@ -201,7 +202,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::LR,  0
   };
   // 64-bit Darwin calling convention. 
-  static const unsigned Macho64_CalleeSavedRegs[] = {
+  static const unsigned Darwin64_CalleeSavedRegs[] = {
     PPC::X14, PPC::X15,
     PPC::X16, PPC::X17, PPC::X18, PPC::X19,
     PPC::X20, PPC::X21, PPC::X22, PPC::X23,
@@ -226,18 +227,17 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     PPC::LR8,  0
   };
   
-  if (Subtarget.isMachoABI())
-    return Subtarget.isPPC64() ? Macho64_CalleeSavedRegs :
-                                 Macho32_CalleeSavedRegs;
+  if (Subtarget.isDarwinABI())
+    return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
+                                 Darwin32_CalleeSavedRegs;
   
-  // ELF 32.
-  return ELF32_CalleeSavedRegs;
+  return SVR4_CalleeSavedRegs;
 }
 
 const TargetRegisterClass* const*
 PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
-  // 32-bit Macho calling convention. 
-  static const TargetRegisterClass * const Macho32_CalleeSavedRegClasses[] = {
+  // 32-bit Darwin calling convention.
+  static const TargetRegisterClass * const Darwin32_CalleeSavedRegClasses[] = {
                        &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
@@ -266,15 +266,13 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     &PPC::GPRCRegClass, 0
   };
   
-  static const TargetRegisterClass * const ELF32_CalleeSavedRegClasses[] = {
-                       &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
+  static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = {
+                                          &PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
     &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
 
-                                                             &PPC::F8RCRegClass,
-    &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
     &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
     &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
     &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
@@ -283,6 +281,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     
     &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
     
+    &PPC::VRSAVERCRegClass,
+    
     &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
     &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
     &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
@@ -297,8 +297,8 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     &PPC::GPRCRegClass, 0
   };
   
-  // 64-bit Macho calling convention. 
-  static const TargetRegisterClass * const Macho64_CalleeSavedRegClasses[] = {
+  // 64-bit Darwin calling convention.
+  static const TargetRegisterClass * const Darwin64_CalleeSavedRegClasses[] = {
     &PPC::G8RCRegClass,&PPC::G8RCRegClass,
     &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
     &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
@@ -327,12 +327,11 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     &PPC::G8RCRegClass, 0
   };
   
-  if (Subtarget.isMachoABI())
-    return Subtarget.isPPC64() ? Macho64_CalleeSavedRegClasses :
-                                 Macho32_CalleeSavedRegClasses;
+  if (Subtarget.isDarwinABI())
+    return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses :
+                                 Darwin32_CalleeSavedRegClasses;
   
-  // ELF 32.
-  return ELF32_CalleeSavedRegClasses;
+  return SVR4_CalleeSavedRegClasses;
 }
 
 // needsFP - Return true if the specified function should have a dedicated frame
@@ -358,10 +357,12 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(PPC::LR8);
   Reserved.set(PPC::RM);
 
-  // In Linux, r2 is reserved for the OS.
-  if (!Subtarget.isDarwin())
-    Reserved.set(PPC::R2);
-
+  // The SVR4 ABI reserves r2 and r13
+  if (Subtarget.isSVR4ABI()) {
+    Reserved.set(PPC::R2);  // System-reserved register
+    Reserved.set(PPC::R13); // Small Data Area pointer register
+  }
+  
   // On PPC64, r13 is the thread pointer. Never allocate this register. Note
   // that this is over conservative, as it also prevents allocation of R31 when
   // the FP is not needed.
@@ -909,6 +910,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
   // don't have a frame pointer, calls, or dynamic alloca then we do not need
   // to adjust the stack pointer (we fit in the Red Zone).
   bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+  // FIXME SVR4 The SVR4 ABI has no red zone.
   if (!DisableRedZone &&
       FrameSize <= 224 &&                          // Fits in red zone.
       !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
@@ -925,7 +927,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
   // Maximum call frame needs to be at least big enough for linkage and 8 args.
   unsigned minCallFrameSize =
     PPCFrameInfo::getMinCallFrameSize(Subtarget.isPPC64(), 
-                                      Subtarget.isMachoABI());
+                                      Subtarget.isDarwinABI());
   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
 
   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
@@ -958,16 +960,15 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   //  Save R31 if necessary
   int FPSI = FI->getFramePointerSaveIndex();
   bool IsPPC64 = Subtarget.isPPC64();
-  bool IsELF32_ABI = Subtarget.isELF32_ABI();
-  bool IsMachoABI  = Subtarget.isMachoABI();
+  bool IsSVR4ABI = Subtarget.isSVR4ABI();
+  bool isDarwinABI  = Subtarget.isDarwinABI();
   MachineFrameInfo *MFI = MF.getFrameInfo();
  
   // If the frame pointer save index hasn't been defined yet.
-  if (!FPSI && (NoFramePointerElim || MFI->hasVarSizedObjects()) &&
-      IsELF32_ABI) {
+  if (!FPSI && needsFP(MF) && IsSVR4ABI) {
     // Find out what the fix offset of the frame pointer save area.
     int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
-                                                           IsMachoABI);
+                                                           isDarwinABI);
     // Allocate the frame index for frame pointer save area.
     FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
     // Save the result.
@@ -976,11 +977,10 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Reserve stack space to move the linkage area to in case of a tail call.
   int TCSPDelta = 0;
-  if (PerformTailCallOpt && (TCSPDelta=FI->getTailCallSPDelta()) < 0) {
-    int AddFPOffsetAmount = IsELF32_ABI ? -4 : 0;
-    MF.getFrameInfo()->CreateFixedObject( -1 * TCSPDelta,
-                                          AddFPOffsetAmount + TCSPDelta);
+  if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+    MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta);
   }
+  
   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
   // a large stack, which will require scavenging a register to materialize a
   // large offset.
@@ -999,6 +999,170 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 }
 
 void
+PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+                                                     const {
+  // Early exit if not using the SVR4 ABI.
+  if (!Subtarget.isSVR4ABI()) {
+    return;
+  }
+  
+  // Get callee saved register information.
+  MachineFrameInfo *FFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+  // Early exit if no callee saved registers are modified!
+  if (CSI.empty() && !needsFP(MF)) {
+    return;
+  }
+  
+  unsigned MinGPR = PPC::R31;
+  unsigned MinFPR = PPC::F31;
+  unsigned MinVR = PPC::V31;
+  
+  bool HasGPSaveArea = false;
+  bool HasFPSaveArea = false;
+  bool HasCRSaveArea = false;
+  bool HasVRSAVESaveArea = false;
+  bool HasVRSaveArea = false;
+  
+  SmallVector<CalleeSavedInfo, 18> GPRegs;
+  SmallVector<CalleeSavedInfo, 18> FPRegs;
+  SmallVector<CalleeSavedInfo, 18> VRegs;
+  
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    const TargetRegisterClass *RC = CSI[i].getRegClass();
+    
+    if (RC == PPC::GPRCRegisterClass) {
+      HasGPSaveArea = true;
+      
+      GPRegs.push_back(CSI[i]);
+      
+      if (Reg < MinGPR) {
+        MinGPR = Reg;
+      }
+    } else if (RC == PPC::F8RCRegisterClass) {
+      HasFPSaveArea = true;
+      
+      FPRegs.push_back(CSI[i]);
+      
+      if (Reg < MinFPR) {
+        MinFPR = Reg;
+      }
+// FIXME SVR4: Disable CR save area for now.
+    } else if (   RC == PPC::CRBITRCRegisterClass
+               || RC == PPC::CRRCRegisterClass) {
+//      HasCRSaveArea = true;
+    } else if (RC == PPC::VRSAVERCRegisterClass) {
+      HasVRSAVESaveArea = true;
+    } else if (RC == PPC::VRRCRegisterClass) {
+      HasVRSaveArea = true;
+      
+      VRegs.push_back(CSI[i]);
+      
+      if (Reg < MinVR) {
+        MinVR = Reg;
+      }
+    } else {
+      assert(0 && "Unknown RegisterClass!");
+    }
+  }
+
+  PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+  
+  int64_t LowerBound = 0;
+
+  // Take into account stack space reserved for tail calls.
+  int TCSPDelta = 0;
+  if (PerformTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+    LowerBound = TCSPDelta;
+  }
+
+  // The Floating-point register save area is right below the back chain word
+  // of the previous stack frame.
+  if (HasFPSaveArea) {
+    for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+      int FI = FPRegs[i].getFrameIdx();
+      
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+    
+    LowerBound -= (31 - getRegisterNumbering(MinFPR) + 1) * 8; 
+  }
+
+  // Check whether the frame pointer register is allocated. If so, make sure it
+  // is spilled to the correct offset.
+  if (needsFP(MF)) {
+    HasGPSaveArea = true;
+    
+    int FI = PFI->getFramePointerSaveIndex();
+    assert(FI && "No Frame Pointer Save Slot!");
+    
+    FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+  }
+  
+  // General register save area starts right below the Floating-point
+  // register save area.
+  if (HasGPSaveArea) {
+    // Move general register save area spill slots down, taking into account
+    // the size of the Floating-point register save area.
+    for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+      int FI = GPRegs[i].getFrameIdx();
+      
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+    
+    LowerBound -= (31 - getRegisterNumbering(MinGPR) + 1) * 4;
+  }
+  
+  // The CR save area is below the general register save area.
+  if (HasCRSaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the CR/CRBIT register class?
+    // Adjust the frame index of the CR spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      const TargetRegisterClass *RC = CSI[i].getRegClass();
+    
+      if (RC == PPC::CRBITRCRegisterClass || RC == PPC::CRRCRegisterClass) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+    
+    LowerBound -= 4; // The CR save area is always 4 bytes long.
+  }
+  
+  if (HasVRSAVESaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the VRSAVE register class?
+    // Adjust the frame index of the VRSAVE spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      const TargetRegisterClass *RC = CSI[i].getRegClass();
+    
+      if (RC == PPC::VRSAVERCRegisterClass) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+    
+    LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+  }
+  
+  if (HasVRSaveArea) {
+    // Insert alignment padding, we need 16-byte alignment.
+    LowerBound = (LowerBound - 15) & ~(15);
+    
+    for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+      int FI = VRegs[i].getFrameIdx();
+      
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+  }
+}
+
+void
 PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1033,15 +1197,26 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
   // Get processor type.
   bool IsPPC64 = Subtarget.isPPC64();
   // Get operating system
-  bool IsMachoABI = Subtarget.isMachoABI();
+  bool isDarwinABI = Subtarget.isDarwinABI();
   // Check if the link register (LR) must be saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF) && FrameSize;
   
-  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
-  int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI);
+    }
+  }
 
   if (IsPPC64) {
     if (MustSaveLR)
@@ -1242,15 +1417,26 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
   // Get processor type.
   bool IsPPC64 = Subtarget.isPPC64();
   // Get operating system
-  bool IsMachoABI = Subtarget.isMachoABI();
+  bool isDarwinABI = Subtarget.isDarwinABI();
   // Check if the link register (LR) has been saved.
   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
   bool MustSaveLR = FI->mustSaveLR();
   // Do we have a frame pointer for this function?
   bool HasFP = hasFP(MF) && FrameSize;
   
-  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, IsMachoABI);
-  int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, IsMachoABI);
+  int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI);
+    }
+  }
   
   bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
     RetOpcode == PPC::TCRETURNdi ||
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 9506b65..ddaefdd 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -75,6 +75,8 @@ public:
 
   void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                             RegScavenger *RS = NULL) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
   void emitPrologue(MachineFunction &MF) const;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 9e15a55..bac8e3a 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -43,8 +43,9 @@ class VR<bits<5> num, string n> : PPCReg<n> {
 }
 
 // CR - One of the 8 4-bit condition registers
-class CR<bits<3> num, string n> : PPCReg<n> {
+class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
   field bits<3> Num = num;
+  let SubRegs = subregs;
 }
 
 // CRBIT - One of the 32 1-bit condition register fields
@@ -189,16 +190,6 @@ def V29 : VR<29, "v29">, DwarfRegNum<[106]>;
 def V30 : VR<30, "v30">, DwarfRegNum<[107]>;
 def V31 : VR<31, "v31">, DwarfRegNum<[108]>;
 
-// Condition registers
-def CR0 : CR<0, "cr0">, DwarfRegNum<[68]>;
-def CR1 : CR<1, "cr1">, DwarfRegNum<[69]>;
-def CR2 : CR<2, "cr2">, DwarfRegNum<[70]>;
-def CR3 : CR<3, "cr3">, DwarfRegNum<[71]>;
-def CR4 : CR<4, "cr4">, DwarfRegNum<[72]>;
-def CR5 : CR<5, "cr5">, DwarfRegNum<[73]>;
-def CR6 : CR<6, "cr6">, DwarfRegNum<[74]>;
-def CR7 : CR<7, "cr7">, DwarfRegNum<[75]>;
-
 // Condition register bits
 def CR0LT : CRBIT< 0, "0">, DwarfRegNum<[0]>;
 def CR0GT : CRBIT< 1, "1">, DwarfRegNum<[0]>;
@@ -233,6 +224,16 @@ def CR7GT : CRBIT<29, "29">, DwarfRegNum<[0]>;
 def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>;
 def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>;
 
+// Condition registers
+def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>;
+def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>;
+def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>;
+def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71]>;
+def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>;
+def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>;
+def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>;
+def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>;
+
 def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
                    [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>;
 def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
@@ -290,7 +291,12 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
       // On PPC64, r13 is the thread pointer.  Never allocate this register.
       // Note that this is overconservative, as it also prevents allocation of
       // R31 when the FP is not needed.
-      if (MF.getTarget().getSubtarget<PPCSubtarget>().isPPC64())
+      // When using the SVR4 ABI, r13 is reserved for the Small Data Area
+      // pointer.
+      const PPCSubtarget &Subtarget
+        = MF.getTarget().getSubtarget<PPCSubtarget>();
+         
+      if (Subtarget.isPPC64() || Subtarget.isSVR4ABI())
         return end()-5;  // don't allocate R13, R31, R0, R1, LR
         
       if (needsFP(MF))
@@ -324,22 +330,24 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
   }];
 }
 
-
-
+// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
+// ABI the size of the Floating-point register save area is determined by the
+// allocated non-volatile register with the lowest register number, as FP
+// register N is spilled to offset 8 * (32 - N) below the back chain word of the
+// previous stack frame. By allocating non-volatiles in reverse order we make
+// sure that the Floating-point register save area is always as small as
+// possible because there aren't any unused spill slots.
 def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7,
-  F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
-  F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
+  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
 def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7,
-  F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21,
-  F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
+  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
 
 def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
  [V2, V3, V4, V5, V0, V1, 
-  V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V20, V21,
-  V22, V23, V24, V25, V26, V27, V28, V29, V30, V31]>;
-
-def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, 
-  CR3, CR4]>;
+  V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+  V29, V28, V27, V26, V25, V24, V23, V22, V21, V20]>;
 
 def CRBITRC : RegisterClass<"PPC", [i32], 32,
   [CR0LT, CR0GT, CR0EQ, CR0UN,
@@ -355,6 +363,13 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32,
   let CopyCost = -1;
 }
 
+def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, 
+  CR3, CR4]>
+{
+  let SubRegClassList = [CRBITRC, CRBITRC, CRBITRC, CRBITRC];
+}
 
 def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
 def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>;
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 176f3e1..f633cc6 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -148,8 +148,8 @@ public:
   /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
   unsigned getDarwinVers() const { return DarwinVers; }
 
-  bool isMachoABI() const { return isDarwin() || IsPPC64; }
-  bool isELF32_ABI() const { return !isDarwin() && !IsPPC64; }
+  bool isDarwinABI() const { return isDarwin() || IsPPC64; }
+  bool isSVR4ABI() const { return !isDarwin() && !IsPPC64; }
 
   unsigned getAsmFlavor() const {
     return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 2f95d7e..e9073d6 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -152,7 +152,7 @@ bool PPCTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                           raw_ostream &Out) {
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
 
   return false;
 }
@@ -183,7 +183,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -215,7 +215,7 @@ bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -230,7 +230,7 @@ bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -245,7 +245,7 @@ bool PPCTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 086d2f4..c693bf4 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -46,7 +46,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             PPCTargetMachine &tm, 
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index 688fb30..6e9e6c7 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -180,6 +180,16 @@ void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
 
 ===-------------------------------------------------------------------------===
 
+Darwin Stub removal:
+
+We still generate calls to foo$stub, and stubs, on Darwin.  This is not
+necessary when building with the Leopard (10.5) or later linker, as stubs are
+generated by ld when necessary.  Parameterizing this based on the deployment
+target (-mmacosx-version-min) is probably enough.  x86-32 does this right, see
+its logic.
+
+===-------------------------------------------------------------------------===
+
 Darwin Stub LICM optimization:
 
 Loops like this:
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index cb23f62..71bd0de 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -50,9 +50,8 @@ namespace {
     unsigned BBNumber;
   public:
     explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM,
-                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                             bool V)
-      : AsmPrinter(O, TM, T, OL, V), BBNumber(0) {}
+                             const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V), BBNumber(0) {}
 
     virtual const char *getPassName() const {
       return "Sparc Assembly Printer";
@@ -84,9 +83,8 @@ namespace {
 ///
 FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o,
                                                TargetMachine &tm,
-                                               CodeGenOpt::Level OptLevel,
                                                bool verbose) {
-  return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 
@@ -109,7 +107,7 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Print out the label for the function.
   const Function *F = MF.getFunction();
   SwitchToSection(TAI->SectionForGlobal(F));
-  EmitAlignment(4, F);
+  EmitAlignment(MF.getAlignment(), F);
   O << "\t.globl\t" << CurrentFnName << '\n';
 
   printVisibility(CurrentFnName, F->getVisibility());
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index eefa7e8..eb045e2 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -21,3 +21,5 @@ add_llvm_target(SparcCodeGen
   SparcTargetAsmInfo.cpp
   SparcTargetMachine.cpp
   )
+
+target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index bb03f30..c7d0ca8 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -25,7 +25,6 @@ namespace llvm {
 
   FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
   FunctionPass *createSparcCodePrinterPass(raw_ostream &OS, TargetMachine &TM,
-                                           CodeGenOpt::Level OptLevel,
                                            bool Verbose);
   FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
   FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 3ec7e06..4c3efde 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1047,3 +1047,8 @@ SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Sparc target isn't yet aware of offsets.
   return false;
 }
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned SparcTargetLowering::getFunctionAlignment(const Function *) const {
+  return 4;
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index fe6811f..27ce1b7 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -73,6 +73,9 @@ namespace llvm {
                                       MVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index d2f6b9b..12c286a 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -256,17 +256,20 @@ MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     if (OpNum == 0) { // COPY -> STORE
       unsigned SrcReg = MI->getOperand(1).getReg();
       bool isKill = MI->getOperand(1).isKill();
+      bool isUndef = MI->getOperand(1).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(),
                       get(isFloat ? SP::STFri : SP::STDFri))
         .addFrameIndex(FI)
         .addImm(0)
-        .addReg(SrcReg, getKillRegState(isKill));
+        .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef));
     } else {             // COPY -> LOAD
       unsigned DstReg = MI->getOperand(0).getReg();
       bool isDead = MI->getOperand(0).isDead();
+      bool isUndef = MI->getOperand(0).isUndef();
       NewMI = BuildMI(MF, MI->getDebugLoc(),
                       get(isFloat ? SP::LDFri : SP::LDDFri))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addReg(DstReg, RegState::Define |
+                getDeadRegState(isDead) | getUndefRegState(isUndef))
         .addFrameIndex(FI)
         .addImm(0);
     }
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index aef238d..1343bcc 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -90,6 +90,6 @@ bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 8afcc73..ee55d3c 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -39,7 +39,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
   
diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp
index 9651e65..3631b35 100644
--- a/lib/Target/TargetELFWriterInfo.cpp
+++ b/lib/Target/TargetELFWriterInfo.cpp
@@ -24,13 +24,3 @@ TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) {
 
 TargetELFWriterInfo::~TargetELFWriterInfo() {}
 
-/// getFunctionAlignment - Returns the alignment for function 'F', targets
-/// with different alignment constraints should overload this method
-unsigned TargetELFWriterInfo::getFunctionAlignment(const Function *F) const {
-  const TargetData *TD = TM.getTargetData();
-  unsigned FnAlign = F->getAlignment();
-  unsigned TDAlign = TD->getPointerABIAlignment();
-  unsigned Align = std::max(FnAlign, TDAlign);
-  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
-  return Align;
-}
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
index e75cfc5..127f228 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -154,21 +154,13 @@ void X86ATTAsmPrinter::decorateName(std::string &Name,
   }
 }
 
-
-
 void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+  unsigned FnAlign = MF.getAlignment();
   const Function *F = MF.getFunction();
 
   decorateName(CurrentFnName, F);
 
   SwitchToSection(TAI->SectionForGlobal(F));
-
-  // FIXME: A function's alignment should be part of MachineFunction.  There
-  // shouldn't be a policy decision here.
-  unsigned FnAlign = 4;
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    FnAlign = 1;
-  
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
   case Function::InternalLinkage:  // Symbols default to internal.
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
index bd96115..f47daf1 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -38,9 +38,8 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   MCStreamer *Streamer;
  public:
   explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
-                            const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                            bool V)
-    : AsmPrinter(O, TM, T, OL, V) {
+                            const TargetAsmInfo *T, bool V)
+    : AsmPrinter(O, TM, T, V) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
     Context = 0;
     Streamer = 0;
@@ -140,6 +139,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   void printi128mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
+  void printi256mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
   void printf32mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
@@ -152,6 +154,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   void printf128mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
+  void printf256mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
   void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
     printLeaMemReference(MI, OpNo);
   }
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index d1623d6..e5d80a4 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -25,15 +25,12 @@ using namespace llvm;
 ///
 FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
                                              X86TargetMachine &tm,
-                                             CodeGenOpt::Level OptLevel,
                                              bool verbose) {
   const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
 
   if (Subtarget->isFlavorIntel())
-    return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                                  OptLevel, verbose);
-  return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                              OptLevel, verbose);
+    return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
+  return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 namespace {
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
index ceae7be..9d4df93 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
@@ -132,6 +132,7 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Print out labels for the function.
   const Function *F = MF.getFunction();
   unsigned CC = F->getCallingConv();
+  unsigned FnAlign = MF.getAlignment();
 
   // Populate function information map.  Actually, We don't want to populate
   // non-stdcall or non-fastcall functions' information right now.
@@ -141,10 +142,6 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   decorateName(CurrentFnName, F);
 
   SwitchToTextSection("_text", F);
-
-  unsigned FnAlign = 4;
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    FnAlign = 1;
   switch (F->getLinkage()) {
   default: assert(0 && "Unsupported linkage type!");
   case Function::PrivateLinkage:
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
index 04f2595..a724c3c 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
@@ -26,9 +26,8 @@ namespace llvm {
 
 struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
   explicit X86IntelAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
-                              const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                              bool V)
-    : AsmPrinter(O, TM, T, OL, V) {}
+                              const TargetAsmInfo *T, bool V)
+    : AsmPrinter(O, TM, T, V) {}
 
   virtual const char *getPassName() const {
     return "X86 Intel-Style Assembly Printer";
@@ -76,6 +75,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
     O << "XMMWORD PTR ";
     printMemReference(MI, OpNo);
   }
+  void printi256mem(const MachineInstr *MI, unsigned OpNo) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
   void printf32mem(const MachineInstr *MI, unsigned OpNo) {
     O << "DWORD PTR ";
     printMemReference(MI, OpNo);
@@ -92,6 +95,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
     O << "XMMWORD PTR ";
     printMemReference(MI, OpNo);
   }
+  void printf256mem(const MachineInstr *MI, unsigned OpNo) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
   void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
     O << "DWORD PTR ";
     printLeaMemReference(MI, OpNo);
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index d982990..7ea0e51 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -27,3 +27,5 @@ add_llvm_target(X86CodeGen
   X86TargetMachine.cpp
   X86FastISel.cpp
   )
+
+target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index fd13b02..22de3f6 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -46,9 +46,7 @@ FunctionPass *createX87FPRegKillInserterPass();
 /// assembly code for a MachineFunction to the given output stream,
 /// using the given target machine description.
 ///
-FunctionPass *createX86CodePrinterPass(raw_ostream &o,
-                                       X86TargetMachine &tm,
-                                       CodeGenOpt::Level OptLevel,
+FunctionPass *createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm,
                                        bool Verbose);
 
 /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 4d26364..47861d5 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -119,6 +119,10 @@ def : Proc<"amdfam10",        [FeatureSSE3,   FeatureSSE4A,
                                Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"barcelona",       [FeatureSSE3,   FeatureSSE4A,
                                Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"istanbul",        [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+                               Feature3DNowA]>;
+def : Proc<"shanghai",        [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+                               Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureMMX]>;
 def : Proc<"winchip2",        [FeatureMMX, Feature3DNow]>;
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 315118f..912ab0e 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -59,18 +59,6 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
   return 0;
 }
 
-unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const {
-  unsigned FnAlign = 4;
-
-  if (F->hasFnAttr(Attribute::OptimizeForSize))
-    FnAlign = 1;
-
-  if (F->getAlignment())
-    FnAlign = Log2_32(F->getAlignment());
-
-  return (1 << FnAlign);
-}
-
 long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const {
   if (is64Bit) {
     switch(RelTy) {
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 96485b8..2ba1a0b 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -41,10 +41,6 @@ namespace llvm {
     X86ELFWriterInfo(TargetMachine &TM);
     virtual ~X86ELFWriterInfo();
 
-    /// getFunctionAlignment - Returns the alignment for function 'F', targets
-    /// with different alignment constraints should overload this method
-    virtual unsigned getFunctionAlignment(const Function *F) const;
-
     /// getRelocationType - Returns the target specific ELF Relocation type.
     /// 'MachineRelTy' contains the object code independent relocation type
     virtual unsigned getRelocationType(unsigned MachineRelTy) const;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 8a21b35..b336d78 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -452,25 +452,38 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
     if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) {
       // Check to see if we've already materialized this
       // value in a register in this block.
-      if (unsigned Reg = LocalValueMap[V]) {
-        AM.Base.Reg = Reg;
+      DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
+      if (I != LocalValueMap.end() && I->second != 0) {
+        AM.Base.Reg = I->second;
         AM.GV = 0;
         return true;
       }
-      // Issue load from stub if necessary.
+      
+      // Issue load from stub.
       unsigned Opc = 0;
       const TargetRegisterClass *RC = NULL;
+      X86AddressMode StubAM;
+      StubAM.Base.Reg = AM.Base.Reg;
+      StubAM.GV = AM.GV;
+      
       if (TLI.getPointerTy() == MVT::i32) {
         Opc = X86::MOV32rm;
         RC  = X86::GR32RegisterClass;
+        
+        if (Subtarget->isPICStyleGOT() &&
+            TM.getRelocationModel() == Reloc::PIC_)
+          StubAM.GVOpFlags = X86II::MO_GOT;
+        
       } else {
         Opc = X86::MOV64rm;
         RC  = X86::GR64RegisterClass;
+        
+        if (TM.getRelocationModel() != Reloc::Static) {
+          StubAM.GVOpFlags = X86II::MO_GOTPCREL;
+          StubAM.Base.Reg = X86::RIP;
+        }
       }
 
-      X86AddressMode StubAM;
-      StubAM.Base.Reg = AM.Base.Reg;
-      StubAM.GV = AM.GV;
       unsigned ResultReg = createResultReg(RC);
       addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM);
 
@@ -1503,7 +1516,9 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
     } else if (Subtarget->isPICStyleGOT()) {
       OpFlag = X86II::MO_GOTOFF;
       PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
-    }
+    } else if (Subtarget->isPICStyleRIPRel() &&
+               TM.getCodeModel() == CodeModel::Small)
+      PICBase = X86::RIP;
   }
 
   // Create the load from the constant pool.
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index ed4eb44..37027ee 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -990,16 +990,21 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   }
   case X86::FpSET_ST0_32:
   case X86::FpSET_ST0_64:
-  case X86::FpSET_ST0_80:
+  case X86::FpSET_ST0_80: {
+    unsigned Op0 = getFPReg(MI->getOperand(0));
+
     // FpSET_ST0_80 is generated by copyRegToReg for both function return
     // and inline assembly with the "st" constrain. In the latter case,
-    // it is possible for FP0 to be alive after this instruction.
-    if (!MI->killsRegister(X86::FP0)) {
-      // Duplicate ST0
-      duplicateToTop(0, 0, I);
+    // it is possible for ST(0) to be alive after this instruction.
+    if (!MI->killsRegister(X86::FP0 + Op0)) {
+      // Duplicate Op0
+      duplicateToTop(0, 7 /*temp register*/, I);
+    } else {
+      moveToTop(Op0, I);
     }
     --StackTop;   // "Forget" we have something on the top of stack!
     break;
+  }
   case X86::FpSET_ST1_32:
   case X86::FpSET_ST1_64:
   case X86::FpSET_ST1_80:
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9614e69..5a6294a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -700,6 +700,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       // Do not attempt to custom lower non-power-of-2 vectors
       if (!isPowerOf2_32(VT.getVectorNumElements()))
         continue;
+      // Do not attempt to custom lower non-128-bit vectors
+      if (!VT.is128BitVector())
+        continue;
       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -718,17 +721,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     }
 
     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
-    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
-      setOperationAction(ISD::AND,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::AND,    (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::OR,     (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::OR,     (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::XOR,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::XOR,    (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::LOAD,   (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::LOAD,   (MVT::SimpleValueType)VT, MVT::v2i64);
-      setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64);
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
+      MVT VT = (MVT::SimpleValueType)i;
+
+      // Do not attempt to promote non-128-bit vectors
+      if (!VT.is128BitVector()) {
+        continue;
+      }
+      setOperationAction(ISD::AND,    VT, Promote);
+      AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
+      setOperationAction(ISD::OR,     VT, Promote);
+      AddPromotedToType (ISD::OR,     VT, MVT::v2i64);
+      setOperationAction(ISD::XOR,    VT, Promote);
+      AddPromotedToType (ISD::XOR,    VT, MVT::v2i64);
+      setOperationAction(ISD::LOAD,   VT, Promote);
+      AddPromotedToType (ISD::LOAD,   VT, MVT::v2i64);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
     }
 
     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -775,6 +784,114 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::VSETCC,             MVT::v2i64, Custom);
   }
 
+  if (!UseSoftFloat && Subtarget->hasAVX()) {
+    addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+
+    setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v8i32, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
+    setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
+    //setOperationAction(ISD::BUILD_VECTOR,       MVT::v8f32, Custom);
+    //setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8f32, Custom);
+    //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
+    //setOperationAction(ISD::SELECT,             MVT::v8f32, Custom);
+    //setOperationAction(ISD::VSETCC,             MVT::v8f32, Custom);
+
+    // Operations to consider commented out -v16i16 v32i8
+    //setOperationAction(ISD::ADD,                MVT::v16i16, Legal);
+    setOperationAction(ISD::ADD,                MVT::v8i32, Custom);
+    setOperationAction(ISD::ADD,                MVT::v4i64, Custom);
+    //setOperationAction(ISD::SUB,                MVT::v32i8, Legal);
+    //setOperationAction(ISD::SUB,                MVT::v16i16, Legal);
+    setOperationAction(ISD::SUB,                MVT::v8i32, Custom);
+    setOperationAction(ISD::SUB,                MVT::v4i64, Custom);
+    //setOperationAction(ISD::MUL,                MVT::v16i16, Legal);
+    setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
+
+    setOperationAction(ISD::VSETCC,             MVT::v4f64, Custom);
+    // setOperationAction(ISD::VSETCC,             MVT::v32i8, Custom);
+    // setOperationAction(ISD::VSETCC,             MVT::v16i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i32, Custom);
+
+    // setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v32i8, Custom);
+    // setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i16, Custom);
+    // setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8f32, Custom);
+
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f64, Custom);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i64, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f64, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom);
+
+#if 0
+    // Not sure we want to do this since there are no 256-bit integer
+    // operations in AVX
+
+    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+    // This includes 256-bit vectors
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
+      MVT VT = (MVT::SimpleValueType)i;
+
+      // Do not attempt to custom lower non-power-of-2 vectors
+      if (!isPowerOf2_32(VT.getVectorNumElements()))
+        continue;
+
+      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    }
+
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i64, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
+    }    
+#endif
+
+#if 0
+    // Not sure we want to do this since there are no 256-bit integer
+    // operations in AVX
+
+    // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
+    // Including 256-bit vectors
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
+      MVT VT = (MVT::SimpleValueType)i;
+
+      if (!VT.is256BitVector()) {
+        continue;
+      }
+      setOperationAction(ISD::AND,    VT, Promote);
+      AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
+      setOperationAction(ISD::OR,     VT, Promote);
+      AddPromotedToType (ISD::OR,     VT, MVT::v4i64);
+      setOperationAction(ISD::XOR,    VT, Promote);
+      AddPromotedToType (ISD::XOR,    VT, MVT::v4i64);
+      setOperationAction(ISD::LOAD,   VT, Promote);
+      AddPromotedToType (ISD::LOAD,   VT, MVT::v4i64);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
+    }
+
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+#endif
+  }
+
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
@@ -805,6 +922,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::MEMBARRIER);
   if (Subtarget->is64Bit())
     setTargetDAGCombine(ISD::MUL);
 
@@ -909,6 +1027,11 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
   return Table;
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
+  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+}
+
 //===----------------------------------------------------------------------===//
 //               Return Value Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -5690,7 +5813,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
       Args.push_back(Entry);
       std::pair<SDValue,SDValue> CallResult =
         LowerCallTo(Chain, Type::VoidTy, false, false, false, false,
-                    CallingConv::C, false,
+                    0, CallingConv::C, false,
                     DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
       return CallResult.second;
     }
@@ -8454,6 +8577,58 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
+// On X86 and X86-64, atomic operations are lowered to locked instructions.
+// Locked instructions, in turn, have implicit fence semantics (all memory
+// operations are flushed before issuing the locked instruction, and the
+// are not buffered), so we can fold away the common pattern of 
+// fence-atomic-fence.
+static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
+  SDValue atomic = N->getOperand(0);
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      break;
+    default:
+      return SDValue();
+  }
+  
+  SDValue fence = atomic.getOperand(0);
+  if (fence.getOpcode() != ISD::MEMBARRIER)
+    return SDValue();
+  
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+      return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2),
+                                    atomic.getOperand(3));
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2));
+    default:
+      return SDValue();
+  }
+}
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -8472,6 +8647,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
   case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
   case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
+  case ISD::MEMBARRIER:     return PerformMEMBARRIERCombine(N, DAG);
   }
 
   return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index fb4eb68..ffed46c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -380,7 +380,7 @@ namespace llvm {
     MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
                             bool isSrcConst, bool isSrcStr,
                             SelectionDAG &DAG) const;
-    
+
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
@@ -533,7 +533,10 @@ namespace llvm {
                    , SmallSet<Instruction*, 8> &
 #endif
                    );
-    
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
   private:
     /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
     /// make the right decision when generating code for different targets.
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index b50dd65..6359542 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -49,8 +49,10 @@ struct X86AddressMode {
   unsigned IndexReg;
   unsigned Disp;
   GlobalValue *GV;
+  unsigned GVOpFlags;
 
-  X86AddressMode() : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0) {
+  X86AddressMode()
+    : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
     Base.Reg = 0;
   }
 };
@@ -113,7 +115,7 @@ inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB,
     assert (0);
   MIB.addImm(AM.Scale).addReg(AM.IndexReg);
   if (AM.GV)
-    return MIB.addGlobalAddress(AM.GV, AM.Disp);
+    return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
   else
     return MIB.addImm(AM.Disp);
 }
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 21f71ec..e5d84c5 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2459,7 +2459,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                getDefRegState(MO.isDef()) |
                RegState::Implicit |
                getKillRegState(MO.isKill()) |
-               getDeadRegState(MO.isDead()));
+               getDeadRegState(MO.isDead()) |
+               getUndefRegState(MO.isUndef()));
   }
   // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
   unsigned NewOpc = 0;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a6b0880..03df10d 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -180,10 +180,12 @@ def i16mem  : X86MemOperand<"printi16mem">;
 def i32mem  : X86MemOperand<"printi32mem">;
 def i64mem  : X86MemOperand<"printi64mem">;
 def i128mem : X86MemOperand<"printi128mem">;
+def i256mem : X86MemOperand<"printi256mem">;
 def f32mem  : X86MemOperand<"printf32mem">;
 def f64mem  : X86MemOperand<"printf64mem">;
 def f80mem  : X86MemOperand<"printf80mem">;
 def f128mem : X86MemOperand<"printf128mem">;
+def f256mem : X86MemOperand<"printf256mem">;
 
 // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
 // plain GR64, so that it doesn't potentially require a REX prefix.
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 996baa0..2e6f017 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -157,6 +157,24 @@ let Namespace = "X86" in {
   def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
   def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
 
+  // YMM Registers, used by AVX instructions
+  def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>;
+  def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>;
+  def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>;
+  def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>;
+  def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>;
+  def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>;
+  def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>;
+  def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>;
+  def YMM8:  Register<"ymm8">,  DwarfRegNum<[25, -2, -2]>;
+  def YMM9:  Register<"ymm9">,  DwarfRegNum<[26, -2, -2]>;
+  def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>;
+  def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>;
+  def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>;
+  def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>;
+  def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>;
+  def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>;
+
   // Floating point stack registers
   def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
   def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>;
@@ -229,6 +247,11 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
                    [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, 
                     R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
 
+def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,  
+                    YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
+                   [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, 
+                    XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
 //===----------------------------------------------------------------------===//
 // Register Class Definitions... now that we have all of the pieces, define the
 // top-level register classes.  The order specified in the register list is
@@ -755,6 +778,10 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
     }
   }];
 }
+def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
+                          [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+                           YMM8, YMM9, YMM10, YMM11,
+                           YMM12, YMM13, YMM14, YMM15]>;
 
 // Status flags registers.
 def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index f4f6cce..0d1434f 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -142,7 +142,7 @@ public:
   bool hasSSE4A() const { return HasSSE4A; }
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
-  bool hasAVX() const { return hasAVX(); }
+  bool hasAVX() const { return HasAVX; }
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 67dcd01..b000914 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -226,7 +226,7 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
 
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
-    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+    PM.add(AsmPrinterCtor(Out, *this, Verbose));
   return false;
 }
 
@@ -254,7 +254,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -284,7 +284,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -298,7 +298,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
@@ -312,7 +312,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   if (DumpAsm) {
     assert(AsmPrinterCtor && "AsmPrinter was not linked in");
     if (AsmPrinterCtor)
-      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+      PM.add(AsmPrinterCtor(errs(), *this, true));
   }
 
   return false;
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index ba73ca8..90a5cc2 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -45,7 +45,6 @@ protected:
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
                                             X86TargetMachine &tm,
-                                            CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index 5722b87..d95aab3 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -26,7 +26,6 @@ namespace llvm {
   FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
   FunctionPass *createXCoreCodePrinterPass(raw_ostream &OS,
                                            XCoreTargetMachine &TM,
-                                           CodeGenOpt::Level OptLevel,
                                            bool Verbose);
 } // end namespace llvm;
 
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 4ab5d75..67cb0c8 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -58,9 +58,8 @@ namespace {
     const XCoreSubtarget &Subtarget;
   public:
     explicit XCoreAsmPrinter(raw_ostream &O, XCoreTargetMachine &TM,
-                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
-                             bool V)
-      : AsmPrinter(O, TM, T, OL, V), DW(0),
+                             const TargetAsmInfo *T, bool V)
+      : AsmPrinter(O, TM, T, V), DW(0),
         Subtarget(*TM.getSubtargetImpl()) {}
 
     virtual const char *getPassName() const {
@@ -106,9 +105,8 @@ namespace {
 ///
 FunctionPass *llvm::createXCoreCodePrinterPass(raw_ostream &o,
                                                XCoreTargetMachine &tm,
-                                               CodeGenOpt::Level OptLevel,
                                                bool verbose) {
-  return new XCoreAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+  return new XCoreAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
 }
 
 // PrintEscapedString - Print each character of the specified string, escaping
@@ -277,7 +275,7 @@ emitFunctionStart(MachineFunction &MF)
     break;
   }
   // (1 << 1) byte aligned
-  EmitAlignment(1, F, 1);
+  EmitAlignment(MF.getAlignment(), F, 1);
   if (TAI->hasDotTypeDotSizeDirective()) {
     O << "\t.type " << CurrentFnName << ",@function\n";
   }
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 93c5f59..cc11d32 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -187,6 +187,12 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
   }
 }
 
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned XCoreTargetLowering::
+getFunctionAlignment(const Function *) const {
+  return 1;
+}
+
 //===----------------------------------------------------------------------===//
 //  Misc Lower Operation implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 993ecbd..753ea81 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -84,6 +84,9 @@ namespace llvm {
     virtual bool isLegalAddressingMode(const AddrMode &AM,
                                        const Type *Ty) const;
 
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
   private:
     const XCoreTargetMachine &TM;
     const XCoreSubtarget &Subtarget;
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 09227d9..b72225f 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -69,6 +69,6 @@ bool XCoreTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                             bool Verbose,
                                             raw_ostream &Out) {
   // Output assembly language.
-  PM.add(createXCoreCodePrinterPass(Out, *this, OptLevel, Verbose));
+  PM.add(createXCoreCodePrinterPass(Out, *this, Verbose));
   return false;
 }