41 files changed, 17955 insertions, 0 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
new file mode 100644
index 0000000..ac7de91
--- /dev/null
+++ b/lib/Target/ARM/ARM.h
@@ -0,0 +1,121 @@
+//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+
+class ARMTargetMachine;
+class FunctionPass;
+class MachineCodeEmitter;
+class JITCodeEmitter;
+class raw_ostream;
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+  // The CondCodes constants map directly to the 4-bit encoding of the 
+  // condition field for predicated instructions. 
+  enum CondCodes {
+    EQ,
+    NE,
+    HS,
+    LO,
+    MI,
+    PL,
+    VS,
+    VC,
+    HI,
+    LS,
+    GE,
+    LT,
+    GT,
+    LE,
+    AL
+  };
+  
+  inline static CondCodes getOppositeCondition(CondCodes CC){
+    switch (CC) {
+    default: assert(0 && "Unknown condition code");
+    case EQ: return NE;
+    case NE: return EQ;
+    case HS: return LO;
+    case LO: return HS;
+    case MI: return PL;
+    case PL: return MI;
+    case VS: return VC;
+    case VC: return VS;
+    case HI: return LS;
+    case LS: return HI;
+    case GE: return LT;
+    case LT: return GE;
+    case GT: return LE;
+    case LE: return GT;
+    }
+  }
+}
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+  switch (CC) {
+  default: assert(0 && "Unknown condition code");
+  case ARMCC::EQ:  return "eq";
+  case ARMCC::NE:  return "ne";
+  case ARMCC::HS:  return "hs";
+  case ARMCC::LO:  return "lo";
+  case ARMCC::MI:  return "mi";
+  case ARMCC::PL:  return "pl";
+  case ARMCC::VS:  return "vs";
+  case ARMCC::VC:  return "vc";
+  case ARMCC::HI:  return "hi";
+  case ARMCC::LS:  return "ls";
+  case ARMCC::GE:  return "ge";
+  case ARMCC::LT:  return "lt";
+  case ARMCC::GT:  return "gt";
+  case ARMCC::LE:  return "le";
+  case ARMCC::AL:  return "al";
+  }
+}
+
+FunctionPass *createARMISelDag(ARMTargetMachine &TM);
+FunctionPass *createARMCodePrinterPass(raw_ostream &O,
+                                       ARMTargetMachine &TM,
+                                       CodeGenOpt::Level OptLevel,
+                                       bool Verbose);
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+                                       MachineCodeEmitter &MCE);
+
+FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM,
+                                        MachineCodeEmitter &MCE);
+FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM, 
+                                           JITCodeEmitter &JCE);
+
+FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMConstantIslandPass();
+
+} // end namespace llvm;
+
+// Defines symbolic names for ARM registers.  This defines a mapping from
+// register name to register number.
+//
+#include "ARMGenRegisterNames.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#include "ARMGenInstrNames.inc"
+
+
+#endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
new file mode 100644
index 0000000..4ac6857
--- /dev/null
+++ b/lib/Target/ARM/ARM.td
@@ -0,0 +1,136 @@
+//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def ArchV4T     : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+                                   "ARM v4T">;
+def ArchV5T     : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+                                   "ARM v5T">;
+def ArchV5TE    : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+                                   "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6      : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+                                   "ARM v6">;
+def ArchV7A     : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
+                                   "ARM v7A">;
+def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
+                                   "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
+                                   "Enable VFP3 instructions">;
+def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
+                                   "Enable NEON instructions">;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
+                                     "Enable Thumb2 instructions">;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+// V4 Processors.
+def : Proc<"generic",         []>;
+def : Proc<"arm8",            []>;
+def : Proc<"arm810",          []>;
+def : Proc<"strongarm",       []>;
+def : Proc<"strongarm110",    []>;
+def : Proc<"strongarm1100",   []>;
+def : Proc<"strongarm1110",   []>;
+
+// V4T Processors.
+def : Proc<"arm7tdmi",        [ArchV4T]>;
+def : Proc<"arm7tdmi-s",      [ArchV4T]>;
+def : Proc<"arm710t",         [ArchV4T]>;
+def : Proc<"arm720t",         [ArchV4T]>;
+def : Proc<"arm9",            [ArchV4T]>;
+def : Proc<"arm9tdmi",        [ArchV4T]>;
+def : Proc<"arm920",          [ArchV4T]>;
+def : Proc<"arm920t",         [ArchV4T]>;
+def : Proc<"arm922t",         [ArchV4T]>;
+def : Proc<"arm940t",         [ArchV4T]>;
+def : Proc<"ep9312",          [ArchV4T]>;
+
+// V5T Processors.
+def : Proc<"arm10tdmi",       [ArchV5T]>;
+def : Proc<"arm1020t",        [ArchV5T]>;
+
+// V5TE Processors.
+def : Proc<"arm9e",           [ArchV5TE]>;
+def : Proc<"arm926ej-s",      [ArchV5TE]>;
+def : Proc<"arm946e-s",       [ArchV5TE]>;
+def : Proc<"arm966e-s",       [ArchV5TE]>;
+def : Proc<"arm968e-s",       [ArchV5TE]>;
+def : Proc<"arm10e",          [ArchV5TE]>;
+def : Proc<"arm1020e",        [ArchV5TE]>;
+def : Proc<"arm1022e",        [ArchV5TE]>;
+def : Proc<"xscale",          [ArchV5TE]>;
+def : Proc<"iwmmxt",          [ArchV5TE]>;
+
+// V6 Processors.
+def : Proc<"arm1136j-s",      [ArchV6]>;
+def : Proc<"arm1136jf-s",     [ArchV6, FeatureVFP2]>;
+def : Proc<"arm1176jz-s",     [ArchV6]>;
+def : Proc<"arm1176jzf-s",    [ArchV6, FeatureVFP2]>;
+def : Proc<"mpcorenovfp",     [ArchV6]>;
+def : Proc<"mpcore",          [ArchV6, FeatureVFP2]>;
+
+def : Proc<"arm1156t2-s",     [ArchV6, FeatureThumb2]>;
+def : Proc<"arm1156t2f-s",    [ArchV6, FeatureThumb2, FeatureVFP2]>;
+
+def : Proc<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : Proc<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo {
+  // Define how we want to layout our target-specific information field.
+  let TSFlagsFields = ["AddrModeBits",
+                       "SizeFlag",
+                       "IndexModeBits",
+                       "isUnaryDataProc",
+                       "Form"];
+  let TSFlagsShifts = [0,
+                       4,
+                       7,
+                       9,
+                       10];
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = ARMInstrInfo;
+}
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
new file mode 100644
index 0000000..6d9b9ee
--- /dev/null
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -0,0 +1,394 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+  
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+  enum ShiftOpc {
+    no_shift = 0,
+    asr,
+    lsl,
+    lsr,
+    ror,
+    rrx
+  };
+  
+  enum AddrOpc {
+    add = '+', sub = '-'
+  };
+  
+  static inline const char *getShiftOpcStr(ShiftOpc Op) {
+    switch (Op) {
+    default: assert(0 && "Unknown shift opc!");
+    case ARM_AM::asr: return "asr";
+    case ARM_AM::lsl: return "lsl";
+    case ARM_AM::lsr: return "lsr";
+    case ARM_AM::ror: return "ror";
+    case ARM_AM::rrx: return "rrx";
+    }
+  }
+  
+  static inline ShiftOpc getShiftOpcForNode(SDValue N) {
+    switch (N.getOpcode()) {
+    default:          return ARM_AM::no_shift;
+    case ISD::SHL:    return ARM_AM::lsl;
+    case ISD::SRL:    return ARM_AM::lsr;
+    case ISD::SRA:    return ARM_AM::asr;
+    case ISD::ROTR:   return ARM_AM::ror;
+    //case ISD::ROTL:  // Only if imm -> turn into ROTR.
+    // Can't handle RRX here, because it would require folding a flag into
+    // the addressing mode.  :(  This causes us to miss certain things.
+    //case ARMISD::RRX: return ARM_AM::rrx;
+    }
+  }
+
+  enum AMSubMode {
+    bad_am_submode = 0,
+    ia,
+    ib,
+    da,
+    db
+  };
+
+  static inline const char *getAMSubModeStr(AMSubMode Mode) {
+    switch (Mode) {
+    default: assert(0 && "Unknown addressing sub-mode!");
+    case ARM_AM::ia: return "ia";
+    case ARM_AM::ib: return "ib";
+    case ARM_AM::da: return "da";
+    case ARM_AM::db: return "db";
+    }
+  }
+
+  static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) {
+    switch (Mode) {
+    default: assert(0 && "Unknown addressing sub-mode!");
+    case ARM_AM::ia: return isLD ? "fd" : "ea";
+    case ARM_AM::ib: return isLD ? "ed" : "fa";
+    case ARM_AM::da: return isLD ? "fa" : "ed";
+    case ARM_AM::db: return isLD ? "ea" : "fd";
+    }
+  }
+
+  /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+  ///
+  static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+    assert(Amt < 32 && "Invalid rotate amount");
+    return (Val >> Amt) | (Val << ((32-Amt)&31));
+  }
+  
+  /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+  ///
+  static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+    assert(Amt < 32 && "Invalid rotate amount");
+    return (Val << Amt) | (Val >> ((32-Amt)&31));
+  }
+  
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #1: shift_operand with registers
+  //===--------------------------------------------------------------------===//
+  //
+  // This 'addressing mode' is used for arithmetic instructions.  It can
+  // represent things like:
+  //   reg
+  //   reg [asr|lsl|lsr|ror|rrx] reg
+  //   reg [asr|lsl|lsr|ror|rrx] imm
+  //
+  // This is stored three operands [rega, regb, opc].  The first is the base
+  // reg, the second is the shift amount (or reg0 if not present or imm).  The
+  // third operand encodes the shift opcode and the imm if a reg isn't present.
+  //
+  static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+    return ShOp | (Imm << 3);
+  }
+  static inline unsigned getSORegOffset(unsigned Op) {
+    return Op >> 3;
+  }
+  static inline ShiftOpc getSORegShOp(unsigned Op) {
+    return (ShiftOpc)(Op & 7);
+  }
+
+  /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+  /// the 8-bit imm value.
+  static inline unsigned getSOImmValImm(unsigned Imm) {
+    return Imm & 0xFF;
+  }
+  /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+  /// the rotate amount.
+  static inline unsigned getSOImmValRot(unsigned Imm) {
+    return (Imm >> 8) * 2;
+  }
+  
+  /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+  /// computing the rotate amount to use.  If this immediate value cannot be
+  /// handled with a single shifter-op, determine a good rotate amount that will
+  /// take a maximal chunk of bits out of the immediate.
+  static inline unsigned getSOImmValRotate(unsigned Imm) {
+    // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+    // of zero.
+    if ((Imm & ~255U) == 0) return 0;
+    
+    // Use CTZ to compute the rotate amount.
+    unsigned TZ = CountTrailingZeros_32(Imm);
+    
+    // Rotate amount must be even.  Something like 0x200 must be rotated 8 bits,
+    // not 9.
+    unsigned RotAmt = TZ & ~1;
+    
+    // If we can handle this spread, return it.
+    if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+      return (32-RotAmt)&31;  // HW rotates right, not left.
+
+    // For values like 0xF000000F, we should skip the first run of ones, then
+    // retry the hunt.
+    if (Imm & 1) {
+      unsigned TrailingOnes = CountTrailingZeros_32(~Imm);
+      if (TrailingOnes != 32) {  // Avoid overflow on 0xFFFFFFFF
+        // Restart the search for a high-order bit after the initial seconds of
+        // ones.
+        unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1));
+      
+        // Rotate amount must be even.
+        unsigned RotAmt2 = TZ2 & ~1;
+        
+        // If this fits, use it.
+        if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0)
+          return (32-RotAmt2)&31;  // HW rotates right, not left.
+      }
+    }
+    
+    // Otherwise, we have no way to cover this span of bits with a single
+    // shifter_op immediate.  Return a chunk of bits that will be useful to
+    // handle.
+    return (32-RotAmt)&31;  // HW rotates right, not left.
+  }
+
+  /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+  /// into an shifter_operand immediate operand, return the 12-bit encoding for
+  /// it.  If not, return -1.
+  static inline int getSOImmVal(unsigned Arg) {
+    // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+    // of zero.
+    if ((Arg & ~255U) == 0) return Arg;
+    
+    unsigned RotAmt = getSOImmValRotate(Arg);
+
+    // If this cannot be handled with a single shifter_op, bail out.
+    if (rotr32(~255U, RotAmt) & Arg)
+      return -1;
+      
+    // Encode this correctly.
+    return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+  }
+  
+  /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+  /// or'ing together two SOImmVal's.
+  static inline bool isSOImmTwoPartVal(unsigned V) {
+    // If this can be handled with a single shifter_op, bail out.
+    V = rotr32(~255U, getSOImmValRotate(V)) & V;
+    if (V == 0)
+      return false;
+    
+    // If this can be handled with two shifter_op's, accept.
+    V = rotr32(~255U, getSOImmValRotate(V)) & V;
+    return V == 0;
+  }
+  
+  /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+  /// return the first chunk of it.
+  static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+    return rotr32(255U, getSOImmValRotate(V)) & V;
+  }
+
+  /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+  /// return the second chunk of it.
+  static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+    // Mask out the first hunk.  
+    V = rotr32(~255U, getSOImmValRotate(V)) & V;
+    
+    // Take what's left.
+    assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+    return V;
+  }
+  
+  /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+  /// by a left shift. Returns the shift amount to use.
+  static inline unsigned getThumbImmValShift(unsigned Imm) {
+    // 8-bit (or less) immediates are trivially immediate operand with a shift
+    // of zero.
+    if ((Imm & ~255U) == 0) return 0;
+
+    // Use CTZ to compute the shift amount.
+    return CountTrailingZeros_32(Imm);
+  }
+
+  /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+  /// by left shifting a 8-bit immediate.
+  static inline bool isThumbImmShiftedVal(unsigned V) {
+    // If this can be handled with 
+    V = (~255U << getThumbImmValShift(V)) & V;
+    return V == 0;
+  }
+
+  /// getThumbImmNonShiftedVal - If V is a value that satisfies
+  /// isThumbImmShiftedVal, return the non-shiftd value.
+  static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+    return V >> getThumbImmValShift(V);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #2
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for most simple load/store instructions.
+  //
+  // addrmode2 := reg +/- reg shop imm
+  // addrmode2 := reg +/- imm12
+  //
+  // The first operand is always a Reg.  The second operand is a reg if in
+  // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
+  // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+  //
+  // If this addressing mode is a frame index (before prolog/epilog insertion
+  // and code rewriting), this operand will have the form:  FI#, reg0, <offs>
+  // with no shift amount for the frame offset.
+  // 
+  static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+    assert(Imm12 < (1 << 12) && "Imm too large!");
+    bool isSub = Opc == sub;
+    return Imm12 | ((int)isSub << 12) | (SO << 13);
+  }
+  static inline unsigned getAM2Offset(unsigned AM2Opc) {
+    return AM2Opc & ((1 << 12)-1);
+  }
+  static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+    return ((AM2Opc >> 12) & 1) ? sub : add;
+  }
+  static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+    return (ShiftOpc)(AM2Opc >> 13);
+  }
+  
+  
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #3
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for sign-extending loads, and load/store-pair instructions.
+  //
+  // addrmode3 := reg +/- reg
+  // addrmode3 := reg +/- imm8
+  //
+  // The first operand is always a Reg.  The second operand is a reg if in
+  // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
+  // in bit 8, the immediate in bits 0-7.
+  
+  /// getAM3Opc - This function encodes the addrmode3 opc field.
+  static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+    bool isSub = Opc == sub;
+    return ((int)isSub << 8) | Offset;
+  }
+  static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+    return AM3Opc & 0xFF;
+  }
+  static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+    return ((AM3Opc >> 8) & 1) ? sub : add;
+  }
+  
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #4
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for load / store multiple instructions.
+  //
+  // addrmode4 := reg, <mode>
+  //
+  // The four modes are:
+  //    IA - Increment after
+  //    IB - Increment before
+  //    DA - Decrement after
+  //    DB - Decrement before
+  //
+  // If the 4th bit (writeback)is set, then the base register is updated after
+  // the memory transfer.
+
+  static inline AMSubMode getAM4SubMode(unsigned Mode) {
+    return (AMSubMode)(Mode & 0x7);
+  }
+
+  static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) {
+    return (int)SubMode | ((int)WB << 3);
+  }
+
+  static inline bool getAM4WBFlag(unsigned Mode) {
+    return (Mode >> 3) & 1;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #5
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for coprocessor instructions, such as FP load/stores.
+  //
+  // addrmode5 := reg +/- imm8*4
+  //
+  // The first operand is always a Reg.  The third field encodes the operation
+  // in bit 8, the immediate in bits 0-7.
+  //
+  // This can also be used for FP load/store multiple ops. The third field encodes
+  // writeback mode in bit 8, the number of registers (or 2 times the number of
+  // registers for DPR ops) in bits 0-7. In addition, bit 9-11 encodes one of the
+  // following two sub-modes:
+  //
+  //    IA - Increment after
+  //    DB - Decrement before
+  
+  /// getAM5Opc - This function encodes the addrmode5 opc field.
+  static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+    bool isSub = Opc == sub;
+    return ((int)isSub << 8) | Offset;
+  }
+  static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+    return AM5Opc & 0xFF;
+  }
+  static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+    return ((AM5Opc >> 8) & 1) ? sub : add;
+  }
+
+  /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
+  /// FSTM instructions.
+  static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
+                                   unsigned char Offset) {
+    assert((SubMode == ia || SubMode == db) &&
+           "Illegal addressing mode 5 sub-mode!");
+    return ((int)SubMode << 9) | ((int)WB << 8) | Offset;
+  }
+  static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
+    return (AMSubMode)((AM5Opc >> 9) & 0x7);
+  }
+  static inline bool getAM5WBFlag(unsigned AM5Opc) {
+    return ((AM5Opc >> 8) & 1);
+  }
+  
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h
new file mode 100644
index 0000000..3b38375
--- /dev/null
+++ b/lib/Target/ARM/ARMBuildAttrs.h
@@ -0,0 +1,64 @@
+//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations and support routines for ARM build attributes
+// as defined in ARM ABI addenda document (ABI release 2.07).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TARGET_ARMBUILDATTRS_H__
+#define __TARGET_ARMBUILDATTRS_H__
+
+namespace ARMBuildAttrs {
+  enum {
+    File                      = 1,
+    Section                   = 2,
+    Symbol                    = 3,
+    CPU_raw_name              = 4,
+    CPU_name                  = 5,
+    CPU_arch                  = 6,
+    CPU_arch_profile          = 7,
+    ARM_ISA_use               = 8,
+    THUMB_ISA_use             = 9,
+    VFP_arch                  = 10,
+    WMMX_arch                 = 11,
+    Advanced_SIMD_arch        = 12,
+    PCS_config                = 13,
+    ABI_PCS_R9_use            = 14,
+    ABI_PCS_RW_data           = 15,
+    ABI_PCS_RO_data           = 16,
+    ABI_PCS_GOT_use           = 17,
+    ABI_PCS_wchar_t           = 18,
+    ABI_FP_rounding           = 19,
+    ABI_FP_denormal           = 20,
+    ABI_FP_exceptions         = 21,
+    ABI_FP_user_exceptions    = 22,
+    ABI_FP_number_model       = 23,
+    ABI_align8_needed         = 24,
+    ABI_align8_preserved      = 25,
+    ABI_enum_size             = 26,
+    ABI_HardFP_use            = 27,
+    ABI_VFP_args              = 28,
+    ABI_WMMX_args             = 29,
+    ABI_optimization_goals    = 30,
+    ABI_FP_optimization_goals = 31,
+    compatibility             = 32,
+    CPU_unaligned_access      = 34,
+    VFP_HP_extension          = 36,
+    ABI_FP_16bit_format       = 38,
+    nodefaults                = 64,
+    also_compatible_with      = 65,
+    T2EE_use                  = 66,
+    conformance               = 67,
+    Virtualization_use        = 68,
+    MPextension_use           = 70
+  };
+}
+
+#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 0000000..6cd786e
--- /dev/null
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,87 @@
+//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+  CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>;
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+  CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // f64 is passed in pairs of GPRs, possibly split onto the stack
+  CCIfType<[f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  CCIfType<[f64], CCAssignToStack<8, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCIfType<[f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_AAPCS : CallingConv<[
+
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // i64/f64 is passed in even pairs of GPRs
+  // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+  // (and the same is true for f64 if VFP is not enabled)
+  CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+  CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
+                       "ArgFlags.getOrigAlign() != 8",
+                       CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  CCIfType<[f64], CCAssignToStack<8, 8>>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def CC_ARM : CallingConv<[
+  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
+  CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetCC_ARM : CallingConv<[
+  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
+  CCDelegateTo<RetCC_ARM_APCS>
+]>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 0000000..44fac12
--- /dev/null
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,1411 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMInstrInfo.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+  class ARMCodeEmitter {
+  public:
+    /// getBinaryCodeForInstr - This function, generated by the
+    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+    /// machine instructions.
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI);
+  };
+
+  template<class CodeEmitter>
+  class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass,
+                                    public ARMCodeEmitter {
+    ARMJITInfo                *JTI;
+    const ARMInstrInfo        *II;
+    const TargetData          *TD;
+    TargetMachine             &TM;
+    CodeEmitter               &MCE;
+    const std::vector<MachineConstantPoolEntry> *MCPEs;
+    const std::vector<MachineJumpTableEntry> *MJTEs;
+    bool IsPIC;
+
+  public:
+    static char ID;
+    explicit Emitter(TargetMachine &tm, CodeEmitter &mce)
+      : MachineFunctionPass(&ID), JTI(0), II(0), TD(0), TM(tm),
+      MCE(mce), MCPEs(0), MJTEs(0),
+      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+    Emitter(TargetMachine &tm, CodeEmitter &mce,
+            const ARMInstrInfo &ii, const TargetData &td)
+      : MachineFunctionPass(&ID), JTI(0), II(&ii), TD(&td), TM(tm),
+      MCE(mce), MCPEs(0), MJTEs(0),
+      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "ARM Machine Code Emitter";
+    }
+
+    void emitInstruction(const MachineInstr &MI);
+
+  private:
+
+    void emitWordLE(unsigned Binary);
+
+    void emitDWordLE(uint64_t Binary);
+
+    void emitConstPoolInstruction(const MachineInstr &MI);
+
+    void emitMOVi2piecesInstruction(const MachineInstr &MI);
+
+    void emitLEApcrelJTInstruction(const MachineInstr &MI);
+
+    void emitPseudoMoveInstruction(const MachineInstr &MI);
+
+    void addPCLabel(unsigned LabelID);
+
+    void emitPseudoInstruction(const MachineInstr &MI);
+
+    unsigned getMachineSoRegOpValue(const MachineInstr &MI,
+                                    const TargetInstrDesc &TID,
+                                    const MachineOperand &MO,
+                                    unsigned OpIdx);
+
+    unsigned getMachineSoImmOpValue(unsigned SoImm);
+
+    unsigned getAddrModeSBit(const MachineInstr &MI,
+                             const TargetInstrDesc &TID) const;
+
+    void emitDataProcessingInstruction(const MachineInstr &MI,
+                                       unsigned ImplicitRd = 0,
+                                       unsigned ImplicitRn = 0);
+
+    void emitLoadStoreInstruction(const MachineInstr &MI,
+                                  unsigned ImplicitRd = 0,
+                                  unsigned ImplicitRn = 0);
+
+    void emitMiscLoadStoreInstruction(const MachineInstr &MI,
+                                      unsigned ImplicitRn = 0);
+
+    void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+    void emitMulFrmInstruction(const MachineInstr &MI);
+
+    void emitExtendInstruction(const MachineInstr &MI);
+
+    void emitMiscArithInstruction(const MachineInstr &MI);
+
+    void emitBranchInstruction(const MachineInstr &MI);
+
+    void emitInlineJumpTable(unsigned JTIndex);
+
+    void emitMiscBranchInstruction(const MachineInstr &MI);
+
+    void emitVFPArithInstruction(const MachineInstr &MI);
+
+    void emitVFPConversionInstruction(const MachineInstr &MI);
+
+    void emitVFPLoadStoreInstruction(const MachineInstr &MI);
+
+    void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+    void emitMiscInstruction(const MachineInstr &MI);
+
+    /// getMachineOpValue - Return binary encoding of operand. If the machine
+    /// operand requires relocation, record the relocation and return zero.
+    unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO);
+    unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) {
+      return getMachineOpValue(MI, MI.getOperand(OpIdx));
+    }
+
+    /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+    ///
+    unsigned getShiftOp(unsigned Imm) const ;
+
+    /// Routines that handle operands which add machine relocations which are
+    /// fixed up by the relocation stage.
+    void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+                           bool NeedStub, intptr_t ACPV = 0);
+    void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+    void emitConstPoolAddress(unsigned CPI, unsigned Reloc);
+    void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc);
+    void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
+                               intptr_t JTBase = 0);
+  };
+  template <class CodeEmitter>
+  char Emitter<CodeEmitter>::ID = 0;
+}
+
+/// createARMCodeEmitterPass - Return a pass that emits the collected ARM code
+/// to the specified MCE object.
+
+namespace llvm {
+
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+                                       MachineCodeEmitter &MCE) {
+  return new Emitter<MachineCodeEmitter>(TM, MCE);
+}
+FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM,
+                                          JITCodeEmitter &JCE) {
+  return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+} // end namespace llvm
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+  assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+          MF.getTarget().getRelocationModel() != Reloc::Static) &&
+         "JIT relocation model must be set to static or default!");
+  II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo();
+  TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData();
+  JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo();
+  MCPEs = &MF.getConstantPool()->getConstants();
+  MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+  IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+  JTI->Initialize(MF, IsPIC);
+
+  do {
+    DOUT << "JITTing function '" << MF.getFunction()->getName() << "'\n";
+    MCE.startFunction(MF);
+    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 
+         MBB != E; ++MBB) {
+      MCE.StartMachineBasicBlock(MBB);
+      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+           I != E; ++I)
+        emitInstruction(*I);
+    }
+  } while (MCE.finishFunction(MF));
+
+  return false;
+}
+
+/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+///
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getShiftOp(unsigned Imm) const {
+  switch (ARM_AM::getAM2ShiftOpc(Imm)) {
+  default: assert(0 && "Unknown shift opc!");
+  case ARM_AM::asr: return 2;
+  case ARM_AM::lsl: return 0;
+  case ARM_AM::lsr: return 1;
+  case ARM_AM::ror:
+  case ARM_AM::rrx: return 3;
+  }
+  return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
+                                                 const MachineOperand &MO) {
+  if (MO.isReg())
+    return ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+  else if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  else if (MO.isGlobal())
+    emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true);
+  else if (MO.isSymbol())
+    emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
+  else if (MO.isCPI()) {
+    const TargetInstrDesc &TID = MI.getDesc();
+    // For VFP load, the immediate offset is multiplied by 4.
+    unsigned Reloc =  ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+      ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
+    emitConstPoolAddress(MO.getIndex(), Reloc);
+  } else if (MO.isJTI())
+    emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
+  else if (MO.isMBB())
+    emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
+  else {
+    cerr << "ERROR: Unknown type of MachineOperand: " << MO << "\n";
+    abort();
+  }
+  return 0;
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
+                                             bool NeedStub, intptr_t ACPV) {
+  MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+                                             GV, ACPV, NeedStub));
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+                                                     unsigned Reloc) {
+  MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+                                                 Reloc, ES));
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI,
+                                                unsigned Reloc) {
+  // Tell JIT emitter we'll resolve the address.
+  MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+                                                    Reloc, CPI, 0, true));
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTIndex, 
+                                                unsigned Reloc) {
+  MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+                                                    Reloc, JTIndex, 0, true));
+}
+
+/// emitMachineBasicBlock - Emit the specified address basic block.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMachineBasicBlock(MachineBasicBlock *BB,
+                                              unsigned Reloc, intptr_t JTBase) {
+  MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+                                             Reloc, BB, JTBase));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitWordLE(unsigned Binary) {
+#ifndef NDEBUG
+  DOUT << "  0x" << std::hex << std::setw(8) << std::setfill('0')
+       << Binary << std::dec << "\n";
+#endif
+  MCE.emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDWordLE(uint64_t Binary) {
+#ifndef NDEBUG
+  DOUT << "  0x" << std::hex << std::setw(8) << std::setfill('0')
+       << (unsigned)Binary << std::dec << "\n";
+  DOUT << "  0x" << std::hex << std::setw(8) << std::setfill('0')
+       << (unsigned)(Binary >> 32) << std::dec << "\n";
+#endif
+  MCE.emitDWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI) {
+  DOUT << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI;
+
+  NumEmitted++;  // Keep track of the # of mi's emitted
+  switch (MI.getDesc().TSFlags & ARMII::FormMask) {
+  default: {
+    assert(0 && "Unhandled instruction encoding format!");
+    break;
+  }
+  case ARMII::Pseudo:
+    emitPseudoInstruction(MI);
+    break;
+  case ARMII::DPFrm:
+  case ARMII::DPSoRegFrm:
+    emitDataProcessingInstruction(MI);
+    break;
+  case ARMII::LdFrm:
+  case ARMII::StFrm:
+    emitLoadStoreInstruction(MI);
+    break;
+  case ARMII::LdMiscFrm:
+  case ARMII::StMiscFrm:
+    emitMiscLoadStoreInstruction(MI);
+    break;
+  case ARMII::LdStMulFrm:
+    emitLoadStoreMultipleInstruction(MI);
+    break;
+  case ARMII::MulFrm:
+    emitMulFrmInstruction(MI);
+    break;
+  case ARMII::ExtFrm:
+    emitExtendInstruction(MI);
+    break;
+  case ARMII::ArithMiscFrm:
+    emitMiscArithInstruction(MI);
+    break;
+  case ARMII::BrFrm:
+    emitBranchInstruction(MI);
+    break;
+  case ARMII::BrMiscFrm:
+    emitMiscBranchInstruction(MI);
+    break;
+  // VFP instructions.
+  case ARMII::VFPUnaryFrm:
+  case ARMII::VFPBinaryFrm:
+    emitVFPArithInstruction(MI);
+    break;
+  case ARMII::VFPConv1Frm:
+  case ARMII::VFPConv2Frm:
+  case ARMII::VFPConv3Frm:
+  case ARMII::VFPConv4Frm:
+  case ARMII::VFPConv5Frm:
+    emitVFPConversionInstruction(MI);
+    break;
+  case ARMII::VFPLdStFrm:
+    emitVFPLoadStoreInstruction(MI);
+    break;
+  case ARMII::VFPLdStMulFrm:
+    emitVFPLoadStoreMultipleInstruction(MI);
+    break;
+  case ARMII::VFPMiscFrm:
+    emitMiscInstruction(MI);
+    break;
+  }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) {
+  unsigned CPI = MI.getOperand(0).getImm();       // CP instruction index.
+  unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
+  const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
+  
+  // Remember the CONSTPOOL_ENTRY address for later relocation.
+  JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
+
+  // Emit constpool island entry. In most cases, the actual values will be
+  // resolved and relocated after code emission.
+  if (MCPE.isMachineConstantPoolEntry()) {
+    ARMConstantPoolValue *ACPV =
+      static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+    DOUT << "  ** ARM constant pool #" << CPI << " @ "
+         << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n';
+
+    GlobalValue *GV = ACPV->getGV();
+    if (GV) {
+      assert(!ACPV->isStub() && "Don't know how to deal this yet!");
+      if (ACPV->isNonLazyPointer())
+        MCE.addRelocation(MachineRelocation::getIndirectSymbol(
+                  MCE.getCurrentPCOffset(), ARM::reloc_arm_machine_cp_entry, GV,
+                  (intptr_t)ACPV, false));
+      else 
+        emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+                          ACPV->isStub() || isa<Function>(GV), (intptr_t)ACPV);
+     } else  {
+      assert(!ACPV->isNonLazyPointer() && "Don't know how to deal this yet!");
+      emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
+    }
+    emitWordLE(0);
+  } else {
+    Constant *CV = MCPE.Val.ConstVal;
+
+#ifndef NDEBUG
+    DOUT << "  ** Constant pool #" << CPI << " @ "
+         << (void*)MCE.getCurrentPCValue() << " ";
+    if (const Function *F = dyn_cast<Function>(CV))
+      DOUT << F->getName();
+    else
+      DOUT << *CV;
+    DOUT << '\n';
+#endif
+
+    if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+      emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV));
+      emitWordLE(0);
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+      uint32_t Val = *(uint32_t*)CI->getValue().getRawData();
+      emitWordLE(Val);
+    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+      if (CFP->getType() == Type::FloatTy)
+        emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      else if (CFP->getType() == Type::DoubleTy)
+        emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      else {
+        assert(0 && "Unable to handle this constantpool entry!");
+        abort();
+      }
+    } else {
+      assert(0 && "Unable to handle this constantpool entry!");
+      abort();
+    }
+  }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+  const MachineOperand &MO0 = MI.getOperand(0);
+  const MachineOperand &MO1 = MI.getOperand(1);
+  assert(MO1.isImm() && "Not a valid so_imm value!");
+  unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
+  unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
+
+  // Emit the 'mov' instruction.
+  unsigned Binary = 0xd << 21;  // mov: Insts{24-21} = 0b1101
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode so_imm.
+  // Set bit I(25) to identify this is the immediate form of <shifter_op>
+  Binary |= 1 << ARMII::I_BitShift;
+  Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V1));
+  emitWordLE(Binary);
+
+  // Now the 'orr' instruction.
+  Binary = 0xc << 21;  // orr: Insts{24-21} = 0b1100
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode Rn.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
+
+  // Encode so_imm.
+  // Set bit I(25) to identify this is the immediate form of <shifter_op>
+  Binary |= 1 << ARMII::I_BitShift;
+  Binary |= getMachineSoImmOpValue(ARM_AM::getSOImmVal(V2));
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+  // It's basically add r, pc, (LJTI - $+8)
+  
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Emit the 'add' instruction.
+  unsigned Binary = 0x4 << 21;  // add: Insts{24-31} = 0b0100
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  Binary |= getAddrModeSBit(MI, TID);
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+  // Encode Rn which is PC.
+  Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+
+  // Encode the displacement.
+  // Set bit I(25) to identify this is the immediate form of <shifter_op>.
+  Binary |= 1 << ARMII::I_BitShift;
+  emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPseudoMoveInstruction(const MachineInstr &MI) {
+  unsigned Opcode = MI.getDesc().Opcode;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
+    Binary |= 1 << ARMII::S_BitShift;
+
+  // Encode register def if there is one.
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+  // Encode the shift operation.
+  switch (Opcode) {
+  default: break;
+  case ARM::MOVrx:
+    // rrx
+    Binary |= 0x6 << 4;
+    break;
+  case ARM::MOVsrl_flag:
+    // lsr #1
+    Binary |= (0x2 << 4) | (1 << 7);
+    break;
+  case ARM::MOVsra_flag:
+    // asr #1
+    Binary |= (0x4 << 4) | (1 << 7);
+    break;
+  }
+
+  // Encode register Rm.
+  Binary |= getMachineOpValue(MI, 1);
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::addPCLabel(unsigned LabelID) {
+  DOUT << "  ** LPC" << LabelID << " @ "
+       << (void*)MCE.getCurrentPCValue() << '\n';
+  JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
+  unsigned Opcode = MI.getDesc().Opcode;
+  switch (Opcode) {
+  default:
+    abort(); // FIXME:
+  case TargetInstrInfo::INLINEASM: {
+    // We allow inline assembler nodes with empty bodies - they can
+    // implicitly define registers, which is ok for JIT.
+    if (MI.getOperand(0).getSymbolName()[0]) {
+      assert(0 && "JIT does not support inline asm!\n");
+      abort();
+    }
+    break;
+  }
+  case TargetInstrInfo::DBG_LABEL:
+  case TargetInstrInfo::EH_LABEL:
+    MCE.emitLabel(MI.getOperand(0).getImm());
+    break;
+  case TargetInstrInfo::IMPLICIT_DEF:
+  case TargetInstrInfo::DECLARE:
+  case ARM::DWARF_LOC:
+    // Do nothing.
+    break;
+  case ARM::CONSTPOOL_ENTRY:
+    emitConstPoolInstruction(MI);
+    break;
+  case ARM::PICADD: {
+    // Remember of the address of the PC label for relocation later.
+    addPCLabel(MI.getOperand(2).getImm());
+    // PICADD is just an add instruction that implicitly read pc.
+    emitDataProcessingInstruction(MI, 0, ARM::PC);
+    break;
+  }
+  case ARM::PICLDR:
+  case ARM::PICLDRB:
+  case ARM::PICSTR:
+  case ARM::PICSTRB: {
+    // Remember of the address of the PC label for relocation later.
+    addPCLabel(MI.getOperand(2).getImm());
+    // These are just load / store instructions that implicitly read pc.
+    emitLoadStoreInstruction(MI, 0, ARM::PC);
+    break;
+  }
+  case ARM::PICLDRH:
+  case ARM::PICLDRSH:
+  case ARM::PICLDRSB:
+  case ARM::PICSTRH: {
+    // Remember of the address of the PC label for relocation later.
+    addPCLabel(MI.getOperand(2).getImm());
+    // These are just load / store instructions that implicitly read pc.
+    emitMiscLoadStoreInstruction(MI, ARM::PC);
+    break;
+  }
+  case ARM::MOVi2pieces:
+    // Two instructions to materialize a constant.
+    emitMOVi2piecesInstruction(MI);
+    break;
+  case ARM::LEApcrelJT:
+    // Materialize jumptable address.
+    emitLEApcrelJTInstruction(MI);
+    break;
+  case ARM::MOVrx:
+  case ARM::MOVsrl_flag:
+  case ARM::MOVsra_flag:
+    emitPseudoMoveInstruction(MI);
+    break;
+  }
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getMachineSoRegOpValue(
+                                                const MachineInstr &MI,
+                                                const TargetInstrDesc &TID,
+                                                const MachineOperand &MO,
+                                                unsigned OpIdx) {
+  unsigned Binary = getMachineOpValue(MI, MO);
+
+  const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
+  const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
+  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+  // Encode the shift opcode.
+  unsigned SBits = 0;
+  unsigned Rs = MO1.getReg();
+  if (Rs) {
+    // Set shift operand (bit[7:4]).
+    // LSL - 0001
+    // LSR - 0011
+    // ASR - 0101
+    // ROR - 0111
+    // RRX - 0110 and bit[11:8] clear.
+    switch (SOpc) {
+    default: assert(0 && "Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x1; break;
+    case ARM_AM::lsr: SBits = 0x3; break;
+    case ARM_AM::asr: SBits = 0x5; break;
+    case ARM_AM::ror: SBits = 0x7; break;
+    case ARM_AM::rrx: SBits = 0x6; break;
+    }
+  } else {
+    // Set shift operand (bit[6:4]).
+    // LSL - 000
+    // LSR - 010
+    // ASR - 100
+    // ROR - 110
+    switch (SOpc) {
+    default: assert(0 && "Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x0; break;
+    case ARM_AM::lsr: SBits = 0x2; break;
+    case ARM_AM::asr: SBits = 0x4; break;
+    case ARM_AM::ror: SBits = 0x6; break;
+    }
+  }
+  Binary |= SBits << 4;
+  if (SOpc == ARM_AM::rrx)
+    return Binary;
+
+  // Encode the shift operation Rs or shift_imm (except rrx).
+  if (Rs) {
+    // Encode Rs bit[11:8].
+    assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+    return Binary |
+      (ARMRegisterInfo::getRegisterNumbering(Rs) << ARMII::RegRsShift);
+  }
+
+  // Encode shift_imm bit[11:7].
+  return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getMachineSoImmOpValue(unsigned SoImm) {
+  // Encode rotate_imm.
+  unsigned Binary = (ARM_AM::getSOImmValRot(SoImm) >> 1)
+    << ARMII::SoRotImmShift;
+
+  // Encode immed_8.
+  Binary |= ARM_AM::getSOImmValImm(SoImm);
+  return Binary;
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getAddrModeSBit(const MachineInstr &MI,
+                                             const TargetInstrDesc &TID) const {
+  for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){
+    const MachineOperand &MO = MI.getOperand(i-1);
+    if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
+      return 1 << ARMII::S_BitShift;
+  }
+  return 0;
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDataProcessingInstruction(
+                                                   const MachineInstr &MI,
+                                                   unsigned ImplicitRd,
+                                                   unsigned ImplicitRn) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  Binary |= getAddrModeSBit(MI, TID);
+
+  // Encode register def if there is one.
+  unsigned NumDefs = TID.getNumDefs();
+  unsigned OpIdx = 0;
+  if (NumDefs)
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+  else if (ImplicitRd)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
+               << ARMII::RegRdShift);
+
+  // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  // Encode first non-shifter register operand if there is one.
+  bool isUnary = TID.TSFlags & ARMII::UnaryDP;
+  if (!isUnary) {
+    if (ImplicitRn)
+      // Special handling for implicit use (e.g. PC).
+      Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
+                 << ARMII::RegRnShift);
+    else {
+      Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
+      ++OpIdx;
+    }
+  }
+
+  // Encode shifter operand.
+  const MachineOperand &MO = MI.getOperand(OpIdx);
+  if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+    // Encode SoReg.
+    emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx));
+    return;
+  }
+
+  if (MO.isReg()) {
+    // Encode register Rm.
+    emitWordLE(Binary | ARMRegisterInfo::getRegisterNumbering(MO.getReg()));
+    return;
+  }
+
+  // Encode so_imm.
+  // Set bit I(25) to identify this is the immediate form of <shifter_op>.
+  Binary |= 1 << ARMII::I_BitShift;
+  Binary |= getMachineSoImmOpValue(MO.getImm());
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitLoadStoreInstruction(
+                                              const MachineInstr &MI,
+                                              unsigned ImplicitRd,
+                                              unsigned ImplicitRn) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  unsigned Form = TID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Operand 0 of a pre- and post-indexed store is the address base
+  // writeback. Skip it.
+  bool Skipped = false;
+  if (IsPrePost && Form == ARMII::StFrm) {
+    ++OpIdx;
+    Skipped = true;
+  }
+
+  // Set first operand
+  if (ImplicitRd)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
+               << ARMII::RegRdShift);
+  else
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  // Set second operand
+  if (ImplicitRn)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
+               << ARMII::RegRnShift);
+  else
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // If this is a two-address operand, skip it. e.g. LDR_PRE.
+  if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  const MachineOperand &MO2 = MI.getOperand(OpIdx);
+  unsigned AM2Opc = (ImplicitRn == ARM::PC)
+    ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+  // Set bit U(23) according to sign of immed value (positive or negative).
+  Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
+             ARMII::U_BitShift);
+  if (!MO2.getReg()) { // is immediate
+    if (ARM_AM::getAM2Offset(AM2Opc))
+      // Set the value of offset_12 field
+      Binary |= ARM_AM::getAM2Offset(AM2Opc);
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Set bit I(25), because this is not in immediate enconding.
+  Binary |= 1 << ARMII::I_BitShift;
+  assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+  // Set bit[3:0] to the corresponding Rm register
+  Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+
+  // If this instr is in scaled register offset/index instruction, set
+  // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
+  if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
+    Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift;  // shift
+    Binary |= ShImm              << ARMII::ShiftShift;     // shift_immed
+  }
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+                                                        unsigned ImplicitRn) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  unsigned Form = TID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Operand 0 of a pre- and post-indexed store is the address base
+  // writeback. Skip it.
+  bool Skipped = false;
+  if (IsPrePost && Form == ARMII::StMiscFrm) {
+    ++OpIdx;
+    Skipped = true;
+  }
+
+  // Set first operand
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  // Set second operand
+  if (ImplicitRn)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRn)
+               << ARMII::RegRnShift);
+  else
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // If this is a two-address operand, skip it. e.g. LDRH_POST.
+  if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  const MachineOperand &MO2 = MI.getOperand(OpIdx);
+  unsigned AM3Opc = (ImplicitRn == ARM::PC)
+    ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+  // Set bit U(23) according to sign of immed value (positive or negative)
+  Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
+             ARMII::U_BitShift);
+
+  // If this instr is in register offset/index encoding, set bit[3:0]
+  // to the corresponding Rm register.
+  if (MO2.getReg()) {
+    Binary |= ARMRegisterInfo::getRegisterNumbering(MO2.getReg());
+    emitWordLE(Binary);
+    return;
+  }
+
+  // This instr is in immediate offset/index encoding, set bit 22 to 1.
+  Binary |= 1 << ARMII::AM3_I_BitShift;
+  if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
+    // Set operands
+    Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift;  // immedH
+    Binary |= (ImmOffs & 0xF);                      // immedL
+  }
+
+  emitWordLE(Binary);
+}
+
+static unsigned getAddrModeUPBits(unsigned Mode) {
+  unsigned Binary = 0;
+
+  // Set addressing mode by modifying bits U(23) and P(24)
+  // IA - Increment after  - bit U = 1 and bit P = 0
+  // IB - Increment before - bit U = 1 and bit P = 1
+  // DA - Decrement after  - bit U = 0 and bit P = 0
+  // DB - Decrement before - bit U = 0 and bit P = 1
+  switch (Mode) {
+  default: assert(0 && "Unknown addressing sub-mode!");
+  case ARM_AM::da:                      break;
+  case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
+  case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
+  case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
+  }
+
+  return Binary;
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitLoadStoreMultipleInstruction(
+                                                       const MachineInstr &MI) {
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Set base address operand
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift;
+
+  // Set addressing mode by modifying bits U(23) and P(24)
+  const MachineOperand &MO = MI.getOperand(1);
+  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
+
+  // Set bit W(21)
+  if (ARM_AM::getAM4WBFlag(MO.getImm()))
+    Binary |= 0x1 << ARMII::W_BitShift;
+
+  // Set registers
+  for (unsigned i = 4, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      break;
+    unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(MO.getReg());
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           RegNum < 16);
+    Binary |= 0x1 << RegNum;
+  }
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMulFrmInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  Binary |= getAddrModeSBit(MI, TID);
+
+  // 32x32->64bit operations have two destination registers. The number
+  // of register definitions will tell us if that's what we're dealing with.
+  unsigned OpIdx = 0;
+  if (TID.getNumDefs() == 2)
+    Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
+
+  // Encode Rm
+  Binary |= getMachineOpValue(MI, OpIdx++);
+
+  // Encode Rs
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
+
+  // Many multiple instructions (e.g. MLA) have three src operands. Encode
+  // it as Rn (for multiply, that's in the same offset as RdLo.
+  if (TID.getNumOperands() > OpIdx &&
+      !TID.OpInfo[OpIdx].isPredicate() &&
+      !TID.OpInfo[OpIdx].isOptionalDef())
+    Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExtendInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  const MachineOperand &MO1 = MI.getOperand(OpIdx++);
+  const MachineOperand &MO2 = MI.getOperand(OpIdx);
+  if (MO2.isReg()) {
+    // Two register operand form.
+    // Encode Rn.
+    Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
+
+    // Encode Rm.
+    Binary |= getMachineOpValue(MI, MO2);
+    ++OpIdx;
+  } else {
+    Binary |= getMachineOpValue(MI, MO1);
+  }
+
+  // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
+  if (MI.getOperand(OpIdx).isImm() &&
+      !TID.OpInfo[OpIdx].isPredicate() &&
+      !TID.OpInfo[OpIdx].isOptionalDef())
+    Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscArithInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  const MachineOperand &MO = MI.getOperand(OpIdx++);
+  if (OpIdx == TID.getNumOperands() ||
+      TID.OpInfo[OpIdx].isPredicate() ||
+      TID.OpInfo[OpIdx].isOptionalDef()) {
+    // Encode Rm and it's done.
+    Binary |= getMachineOpValue(MI, MO);
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Encode Rn.
+  Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
+
+  // Encode Rm.
+  Binary |= getMachineOpValue(MI, OpIdx++);
+
+  // Encode shift_imm.
+  unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+  assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+  Binary |= ShiftAmt << ARMII::ShiftShift;
+  
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitBranchInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  if (TID.Opcode == ARM::TPsoft)
+    abort(); // FIXME
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Set signed_immed_24 field
+  Binary |= getMachineOpValue(MI, 0);
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInlineJumpTable(unsigned JTIndex) {
+  // Remember the base address of the inline jump table.
+  uintptr_t JTBase = MCE.getCurrentPCValue();
+  JTI->addJumpTableBaseAddr(JTIndex, JTBase);
+  DOUT << "  ** Jump Table #" << JTIndex << " @ " << (void*)JTBase << '\n';
+
+  // Now emit the jump table entries.
+  const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
+  for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+    if (IsPIC)
+      // DestBB address - JT base.
+      emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
+    else
+      // Absolute DestBB address.
+      emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
+    emitWordLE(0);
+  }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscBranchInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Handle jump tables.
+  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
+    // First emit a ldr pc, [] instruction.
+    emitDataProcessingInstruction(MI, ARM::PC);
+
+    // Then emit the inline jump table.
+    unsigned JTIndex = (TID.Opcode == ARM::BR_JTr)
+      ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
+    emitInlineJumpTable(JTIndex);
+    return;
+  } else if (TID.Opcode == ARM::BR_JTm) {
+    // First emit a ldr pc, [] instruction.
+    emitLoadStoreInstruction(MI, ARM::PC);
+
+    // Then emit the inline jump table.
+    emitInlineJumpTable(MI.getOperand(3).getIndex());
+    return;
+  }
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  if (TID.Opcode == ARM::BX_RET)
+    // The return register is LR.
+    Binary |= ARMRegisterInfo::getRegisterNumbering(ARM::LR);
+  else 
+    // otherwise, set the return register
+    Binary |= getMachineOpValue(MI, 0);
+
+  emitWordLE(Binary);
+}
+
+static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegD = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  bool isSPVFP = false;
+  RegD = ARMRegisterInfo::getRegisterNumbering(RegD, isSPVFP);
+  if (!isSPVFP)
+    Binary |=   RegD               << ARMII::RegRdShift;
+  else {
+    Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
+    Binary |=  (RegD & 0x01)       << ARMII::D_BitShift;
+  }
+  return Binary;
+}
+
+static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegN = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  bool isSPVFP = false;
+  RegN = ARMRegisterInfo::getRegisterNumbering(RegN, isSPVFP);
+  if (!isSPVFP)
+    Binary |=   RegN               << ARMII::RegRnShift;
+  else {
+    Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
+    Binary |=  (RegN & 0x01)       << ARMII::N_BitShift;
+  }
+  return Binary;
+}
+
+static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegM = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  bool isSPVFP = false;
+  RegM = ARMRegisterInfo::getRegisterNumbering(RegM, isSPVFP);
+  if (!isSPVFP)
+    Binary |=   RegM;
+  else {
+    Binary |= ((RegM & 0x1E) >> 1);
+    Binary |=  (RegM & 0x01)       << ARMII::M_BitShift;
+  }
+  return Binary;
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPArithInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+  assert((Binary & ARMII::D_BitShift) == 0 &&
+         (Binary & ARMII::N_BitShift) == 0 &&
+         (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
+
+  // Encode Dd / Sd.
+  Binary |= encodeVFPRd(MI, OpIdx++);
+
+  // If this is a two-address operand, skip it, e.g. FMACD.
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  // Encode Dn / Sn.
+  if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+    Binary |= encodeVFPRn(MI, OpIdx++);
+
+  if (OpIdx == TID.getNumOperands() ||
+      TID.OpInfo[OpIdx].isPredicate() ||
+      TID.OpInfo[OpIdx].isOptionalDef()) {
+    // FCMPEZD etc. has only one operand.
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Encode Dm / Sm.
+  Binary |= encodeVFPRm(MI, OpIdx);
+  
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPConversionInstruction(
+      const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  unsigned Form = TID.TSFlags & ARMII::FormMask;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  switch (Form) {
+  default: break;
+  case ARMII::VFPConv1Frm:
+  case ARMII::VFPConv2Frm:
+  case ARMII::VFPConv3Frm:
+    // Encode Dd / Sd.
+    Binary |= encodeVFPRd(MI, 0);
+    break;
+  case ARMII::VFPConv4Frm:
+    // Encode Dn / Sn.
+    Binary |= encodeVFPRn(MI, 0);
+    break;
+  case ARMII::VFPConv5Frm:
+    // Encode Dm / Sm.
+    Binary |= encodeVFPRm(MI, 0);
+    break;
+  }
+
+  switch (Form) {
+  default: break;
+  case ARMII::VFPConv1Frm:
+    // Encode Dm / Sm.
+    Binary |= encodeVFPRm(MI, 1);
+    break;
+  case ARMII::VFPConv2Frm:
+  case ARMII::VFPConv3Frm:
+    // Encode Dn / Sn.
+    Binary |= encodeVFPRn(MI, 1);
+    break;
+  case ARMII::VFPConv4Frm:
+  case ARMII::VFPConv5Frm:
+    // Encode Dd / Sd.
+    Binary |= encodeVFPRd(MI, 1);
+    break;
+  }
+
+  if (Form == ARMII::VFPConv5Frm)
+    // Encode Dn / Sn.
+    Binary |= encodeVFPRn(MI, 2);
+  else if (Form == ARMII::VFPConv3Frm)
+    // Encode Dm / Sm.
+    Binary |= encodeVFPRm(MI, 2);
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Encode Dd / Sd.
+  Binary |= encodeVFPRd(MI, OpIdx++);
+
+  // Encode address base.
+  const MachineOperand &Base = MI.getOperand(OpIdx++);
+  Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
+
+  // If there is a non-zero immediate offset, encode it.
+  if (Base.isReg()) {
+    const MachineOperand &Offset = MI.getOperand(OpIdx);
+    if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
+      if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
+        Binary |= 1 << ARMII::U_BitShift;
+      Binary |= ImmOffs;
+      emitWordLE(Binary);
+      return;
+    }
+  }
+
+  // If immediate offset is omitted, default to +0.
+  Binary |= 1 << ARMII::U_BitShift;
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitVFPLoadStoreMultipleInstruction(
+                                                       const MachineInstr &MI) {
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Set base address operand
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRnShift;
+
+  // Set addressing mode by modifying bits U(23) and P(24)
+  const MachineOperand &MO = MI.getOperand(1);
+  Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
+
+  // Set bit W(21)
+  if (ARM_AM::getAM5WBFlag(MO.getImm()))
+    Binary |= 0x1 << ARMII::W_BitShift;
+
+  // First register is encoded in Dd.
+  Binary |= encodeVFPRd(MI, 4);
+
+  // Number of registers are encoded in offset field.
+  unsigned NumRegs = 1;
+  for (unsigned i = 5, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      break;
+    ++NumRegs;
+  }
+  Binary |= NumRegs * 2;
+
+  emitWordLE(Binary);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMiscInstruction(const MachineInstr &MI) {
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  emitWordLE(Binary);
+}
+
+#include "ARMGenCodeEmitter.inc"
+
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 0000000..db723fe
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,1285 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function.  This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumCPEs,     "Number of constpool entries");
+STATISTIC(NumSplit,    "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed, "Number of cond branches fixed");
+STATISTIC(NumUBrFixed, "Number of uncond branches fixed");
+
+namespace {
+  /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+  /// requires constant pool entries to be scattered among the instructions
+  /// inside a function.  To do this, it completely ignores the normal LLVM
+  /// constant pool; instead, it places constants wherever it feels like with
+  /// special instructions.
+  ///
+  /// The terminology used in this pass includes:
+  ///   Islands - Clumps of constants placed in the function.
+  ///   Water   - Potential places where an island could be formed.
+  ///   CPE     - A constant pool entry that has been placed somewhere, which
+  ///             tracks a list of users.
+  class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass {
+    /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+    /// by MBB Number.  The two-byte pads required for Thumb alignment are
+    /// counted as part of the following block (i.e., the offset and size for
+    /// a padded block will both be ==2 mod 4).
+    std::vector<unsigned> BBSizes;
+
+    /// BBOffsets - the offset of each MBB in bytes, starting from 0.
+    /// The two-byte pads required for Thumb alignment are counted as part of
+    /// the following block.
+    std::vector<unsigned> BBOffsets;
+
+    /// WaterList - A sorted list of basic blocks where islands could be placed
+    /// (i.e. blocks that don't fall through to the following block, due
+    /// to a return, unreachable, or unconditional branch).
+    std::vector<MachineBasicBlock*> WaterList;
+
+    /// CPUser - One user of a constant pool, keeping the machine instruction
+    /// pointer, the constant pool being referenced, and the max displacement
+    /// allowed from the instruction to the CP.
+    struct CPUser {
+      MachineInstr *MI;
+      MachineInstr *CPEMI;
+      unsigned MaxDisp;
+      CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp)
+        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp) {}
+    };
+
+    /// CPUsers - Keep track of all of the machine instructions that use various
+    /// constant pools and their max displacement.
+    std::vector<CPUser> CPUsers;
+
+    /// CPEntry - One per constant pool entry, keeping the machine instruction
+    /// pointer, the constpool index, and the number of CPUser's which
+    /// reference this entry.
+    struct CPEntry {
+      MachineInstr *CPEMI;
+      unsigned CPI;
+      unsigned RefCount;
+      CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+        : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+    };
+
+    /// CPEntries - Keep track of all of the constant pool entry machine
+    /// instructions. For each original constpool index (i.e. those that
+    /// existed upon entry to this pass), it keeps a vector of entries.
+    /// Original elements are cloned as we go along; the clones are
+    /// put in the vector of the original element, but have distinct CPIs.
+    std::vector<std::vector<CPEntry> > CPEntries;
+
+    /// ImmBranch - One per immediate branch, keeping the machine instruction
+    /// pointer, conditional or unconditional, the max displacement,
+    /// and (if isCond is true) the corresponding unconditional branch
+    /// opcode.
+    struct ImmBranch {
+      MachineInstr *MI;
+      unsigned MaxDisp : 31;
+      bool isCond : 1;
+      int UncondBr;
+      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+        : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+    };
+
+    /// ImmBranches - Keep track of all the immediate branch instructions.
+    ///
+    std::vector<ImmBranch> ImmBranches;
+
+    /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+    ///
+    SmallVector<MachineInstr*, 4> PushPopMIs;
+
+    /// HasFarJump - True if any far jump instruction has been emitted during
+    /// the branch fix up pass.
+    bool HasFarJump;
+
+    const TargetInstrInfo *TII;
+    ARMFunctionInfo *AFI;
+    bool isThumb;
+  public:
+    static char ID;
+    ARMConstantIslands() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM constant island placement and branch shortening pass";
+    }
+
+  private:
+    void DoInitialPlacement(MachineFunction &Fn,
+                            std::vector<MachineInstr*> &CPEMIs);
+    CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+    void InitialFunctionScan(MachineFunction &Fn,
+                             const std::vector<MachineInstr*> &CPEMIs);
+    MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
+    void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+    void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+    bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
+    int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
+    bool LookForWater(CPUser&U, unsigned UserOffset,
+                      MachineBasicBlock** NewMBB);
+    MachineBasicBlock* AcceptWater(MachineBasicBlock *WaterBB,
+                        std::vector<MachineBasicBlock*>::iterator IP);
+    void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+                      MachineBasicBlock** NewMBB);
+    bool HandleConstantPoolUser(MachineFunction &Fn, unsigned CPUserIndex);
+    void RemoveDeadCPEMI(MachineInstr *CPEMI);
+    bool RemoveUnusedCPEntries();
+    bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+                      MachineInstr *CPEMI, unsigned Disp,
+                      bool DoDump);
+    bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
+                        CPUser &U);
+    bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+                        unsigned Disp, bool NegativeOK);
+    bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+    bool FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br);
+    bool FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br);
+    bool FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br);
+    bool UndoLRSpillRestore();
+
+    unsigned GetOffsetOf(MachineInstr *MI) const;
+    void dumpBBs();
+    void verify(MachineFunction &Fn);
+  };
+  char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify(MachineFunction &Fn) {
+  assert(BBOffsets.size() == BBSizes.size());
+  for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
+    assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
+  if (isThumb) {
+    for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+         MBBI != E; ++MBBI) {
+      MachineBasicBlock *MBB = MBBI;
+      if (!MBB->empty() &&
+          MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY)
+        assert((BBOffsets[MBB->getNumber()]%4 == 0 &&
+                BBSizes[MBB->getNumber()]%4 == 0) ||
+               (BBOffsets[MBB->getNumber()]%4 != 0 &&
+                BBSizes[MBB->getNumber()]%4 != 0));
+    }
+  }
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+  for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
+    DOUT << "block " << J << " offset " << BBOffsets[J] <<
+                            " size " << BBSizes[J] << "\n";
+  }
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+  return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &Fn) {
+  MachineConstantPool &MCP = *Fn.getConstantPool();
+
+  TII = Fn.getTarget().getInstrInfo();
+  AFI = Fn.getInfo<ARMFunctionInfo>();
+  isThumb = AFI->isThumbFunction();
+
+  HasFarJump = false;
+
+  // Renumber all of the machine basic blocks in the function, guaranteeing that
+  // the numbers agree with the position of the block in the function.
+  Fn.RenumberBlocks();
+
+  /// Thumb functions containing constant pools get 2-byte alignment.
+  /// This is so we can keep exact track of where the alignment padding goes.
+  /// Set default.
+  AFI->setAlign(isThumb ? 1U : 2U);
+
+  // Perform the initial placement of the constant pool entries.  To start with,
+  // we put them all at the end of the function.
+  std::vector<MachineInstr*> CPEMIs;
+  if (!MCP.isEmpty()) {
+    DoInitialPlacement(Fn, CPEMIs);
+    if (isThumb)
+      AFI->setAlign(2U);
+  }
+
+  /// The next UID to take is the first unused one.
+  AFI->initConstPoolEntryUId(CPEMIs.size());
+
+  // Do the initial scan of the function, building up information about the
+  // sizes of each block, the location of all the water, and finding all of the
+  // constant pool users.
+  InitialFunctionScan(Fn, CPEMIs);
+  CPEMIs.clear();
+
+  /// Remove dead constant pool entries.
+  RemoveUnusedCPEntries();
+
+  // Iteratively place constant pool entries and fix up branches until there
+  // is no change.
+  bool MadeChange = false;
+  while (true) {
+    bool Change = false;
+    for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+      Change |= HandleConstantPoolUser(Fn, i);
+    DEBUG(dumpBBs());
+    for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+      Change |= FixUpImmediateBr(Fn, ImmBranches[i]);
+    DEBUG(dumpBBs());
+    if (!Change)
+      break;
+    MadeChange = true;
+  }
+
+  // After a while, this might be made debug-only, but it is not expensive.
+  verify(Fn);
+
+  // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
+  // Undo the spill / restore of LR if possible.
+  if (!HasFarJump && AFI->isLRSpilledForFarJump() && isThumb)
+    MadeChange |= UndoLRSpillRestore();
+
+  BBSizes.clear();
+  BBOffsets.clear();
+  WaterList.clear();
+  CPUsers.clear();
+  CPEntries.clear();
+  ImmBranches.clear();
+  PushPopMIs.clear();
+
+  return MadeChange;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries.  To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &Fn,
+                                        std::vector<MachineInstr*> &CPEMIs) {
+  // Create the basic block to hold the CPE's.
+  MachineBasicBlock *BB = Fn.CreateMachineBasicBlock();
+  Fn.push_back(BB);
+
+  // Add all of the constants from the constant pool to the end block, use an
+  // identity mapping of CPI's to CPE's.
+  const std::vector<MachineConstantPoolEntry> &CPs =
+    Fn.getConstantPool()->getConstants();
+
+  const TargetData &TD = *Fn.getTarget().getTargetData();
+  for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+    unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+    // Verify that all constant pool entries are a multiple of 4 bytes.  If not,
+    // we would have to pad them out or something so that instructions stay
+    // aligned.
+    assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+    MachineInstr *CPEMI =
+      BuildMI(BB, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+                           .addImm(i).addConstantPoolIndex(i).addImm(Size);
+    CPEMIs.push_back(CPEMI);
+
+    // Add a new CPEntry, but no corresponding CPUser yet.
+    std::vector<CPEntry> CPEs;
+    CPEs.push_back(CPEntry(CPEMI, i));
+    CPEntries.push_back(CPEs);
+    NumCPEs++;
+    DOUT << "Moved CPI#" << i << " to end of function as #" << i << "\n";
+  }
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+  // Get the next machine basic block in the function.
+  MachineFunction::iterator MBBI = MBB;
+  if (next(MBBI) == MBB->getParent()->end())  // Can't fall off end of function.
+    return false;
+
+  MachineBasicBlock *NextBB = next(MBBI);
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I)
+    if (*I == NextBB)
+      return true;
+
+  return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+                                        const MachineInstr *CPEMI) {
+  std::vector<CPEntry> &CPEs = CPEntries[CPI];
+  // Number of entries per constpool index should be small, just do a
+  // linear search.
+  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+    if (CPEs[i].CPEMI == CPEMI)
+      return &CPEs[i];
+  }
+  return NULL;
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &Fn,
+                                 const std::vector<MachineInstr*> &CPEMIs) {
+  unsigned Offset = 0;
+  for (MachineFunction::iterator MBBI = Fn.begin(), E = Fn.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+
+    // If this block doesn't fall through into the next MBB, then this is
+    // 'water' that a constant pool island could be placed.
+    if (!BBHasFallthrough(&MBB))
+      WaterList.push_back(&MBB);
+
+    unsigned MBBSize = 0;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I) {
+      // Add instruction size to MBBSize.
+      MBBSize += TII->GetInstSizeInBytes(I);
+
+      int Opc = I->getOpcode();
+      if (I->getDesc().isBranch()) {
+        bool isCond = false;
+        unsigned Bits = 0;
+        unsigned Scale = 1;
+        int UOpc = Opc;
+        switch (Opc) {
+        case ARM::tBR_JTr:
+          // A Thumb table jump may involve padding; for the offsets to
+          // be right, functions containing these must be 4-byte aligned.
+          AFI->setAlign(2U);
+          if ((Offset+MBBSize)%4 != 0)
+            MBBSize += 2;           // padding
+          continue;   // Does not get an entry in ImmBranches
+        default:
+          continue;  // Ignore other JT branches
+        case ARM::Bcc:
+          isCond = true;
+          UOpc = ARM::B;
+          // Fallthrough
+        case ARM::B:
+          Bits = 24;
+          Scale = 4;
+          break;
+        case ARM::tBcc:
+          isCond = true;
+          UOpc = ARM::tB;
+          Bits = 8;
+          Scale = 2;
+          break;
+        case ARM::tB:
+          Bits = 11;
+          Scale = 2;
+          break;
+        }
+
+        // Record this immediate branch.
+        unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+        ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+      }
+
+      if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+        PushPopMIs.push_back(I);
+
+      // Scan the instructions for constant pool operands.
+      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+        if (I->getOperand(op).isCPI()) {
+          // We found one.  The addressing mode tells us the max displacement
+          // from the PC that this instruction permits.
+
+          // Basic size info comes from the TSFlags field.
+          unsigned Bits = 0;
+          unsigned Scale = 1;
+          unsigned TSFlags = I->getDesc().TSFlags;
+          switch (TSFlags & ARMII::AddrModeMask) {
+          default:
+            // Constant pool entries can reach anything.
+            if (I->getOpcode() == ARM::CONSTPOOL_ENTRY)
+              continue;
+            if (I->getOpcode() == ARM::tLEApcrel) {
+              Bits = 8;  // Taking the address of a CP entry.
+              break;
+            }
+            assert(0 && "Unknown addressing mode for CP reference!");
+          case ARMII::AddrMode1: // AM1: 8 bits << 2
+            Bits = 8;
+            Scale = 4;  // Taking the address of a CP entry.
+            break;
+          case ARMII::AddrMode2:
+            Bits = 12;  // +-offset_12
+            break;
+          case ARMII::AddrMode3:
+            Bits = 8;   // +-offset_8
+            break;
+            // addrmode4 has no immediate offset.
+          case ARMII::AddrMode5:
+            Bits = 8;
+            Scale = 4;  // +-(offset_8*4)
+            break;
+          case ARMII::AddrModeT1:
+            Bits = 5;  // +offset_5
+            break;
+          case ARMII::AddrModeT2:
+            Bits = 5;
+            Scale = 2;  // +(offset_5*2)
+            break;
+          case ARMII::AddrModeT4:
+            Bits = 5;
+            Scale = 4;  // +(offset_5*4)
+            break;
+          case ARMII::AddrModeTs:
+            Bits = 8;
+            Scale = 4;  // +(offset_8*4)
+            break;
+          }
+
+          // Remember that this is a user of a CP entry.
+          unsigned CPI = I->getOperand(op).getIndex();
+          MachineInstr *CPEMI = CPEMIs[CPI];
+          unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+          CPUsers.push_back(CPUser(I, CPEMI, MaxOffs));
+
+          // Increment corresponding CPEntry reference count.
+          CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+          assert(CPE && "Cannot find a corresponding CPEntry!");
+          CPE->RefCount++;
+
+          // Instructions can only use one CP entry, don't bother scanning the
+          // rest of the operands.
+          break;
+        }
+    }
+
+    // In thumb mode, if this block is a constpool island, we may need padding
+    // so it's aligned on 4 byte boundary.
+    if (isThumb &&
+        !MBB.empty() &&
+        MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+        (Offset%4) != 0)
+      MBBSize += 2;
+
+    BBSizes.push_back(MBBSize);
+    BBOffsets.push_back(Offset);
+    Offset += MBBSize;
+  }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function.  This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BBOffsets[MBB->getNumber()];
+
+  // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
+  // alignment padding, and compensate if so.
+  if (isThumb &&
+      MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+      Offset%4 != 0)
+    Offset += 2;
+
+  // Sum instructions before MI in MBB.
+  for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    if (&*I == MI) return Offset;
+    Offset += TII->GetInstSizeInBytes(I);
+  }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+                              const MachineBasicBlock *RHS) {
+  return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers.  Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+  // Renumber the MBB's to keep them consequtive.
+  NewBB->getParent()->RenumberBlocks(NewBB);
+
+  // Insert a size into BBSizes to align it properly with the (newly
+  // renumbered) block numbers.
+  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+  // Likewise for BBOffsets.
+  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+  // Next, update WaterList.  Specifically, we need to add NewMBB as having
+  // available water after it.
+  std::vector<MachineBasicBlock*>::iterator IP =
+    std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+                     CompareMBBNumbers);
+  WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch.  Update datastructures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+  MachineBasicBlock *OrigBB = MI->getParent();
+  MachineFunction &MF = *OrigBB->getParent();
+
+  // Create a new MBB for the code after the OrigBB.
+  MachineBasicBlock *NewBB =
+    MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+  MF.insert(MBBI, NewBB);
+
+  // Splice the instructions starting with MI over to NewBB.
+  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+  // Add an unconditional branch from OrigBB to NewBB.
+  // Note the new unconditional branch is not being recorded.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond to anything in the source.
+  BuildMI(OrigBB, DebugLoc::getUnknownLoc(),
+          TII->get(isThumb ? ARM::tB : ARM::B)).addMBB(NewBB);
+  NumSplit++;
+
+  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
+  while (!OrigBB->succ_empty()) {
+    MachineBasicBlock *Succ = *OrigBB->succ_begin();
+    OrigBB->removeSuccessor(Succ);
+    NewBB->addSuccessor(Succ);
+
+    // This pass should be run after register allocation, so there should be no
+    // PHI nodes to update.
+    assert((Succ->empty() || Succ->begin()->getOpcode() != TargetInstrInfo::PHI)
+           && "PHI nodes should be eliminated by now!");
+  }
+
+  // OrigBB branches to NewBB.
+  OrigBB->addSuccessor(NewBB);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  // This is almost the same as UpdateForInsertedWaterBlock, except that
+  // the Water goes after OrigBB, not NewBB.
+  MF.RenumberBlocks(NewBB);
+
+  // Insert a size into BBSizes to align it properly with the (newly
+  // renumbered) block numbers.
+  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+  // Likewise for BBOffsets.
+  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+  // Next, update WaterList.  Specifically, we need to add OrigMBB as having
+  // available water after it (but not if it's already there, which happens
+  // when splitting before a conditional branch that is followed by an
+  // unconditional branch - in that case we want to insert NewBB).
+  std::vector<MachineBasicBlock*>::iterator IP =
+    std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+                     CompareMBBNumbers);
+  MachineBasicBlock* WaterBB = *IP;
+  if (WaterBB == OrigBB)
+    WaterList.insert(next(IP), NewBB);
+  else
+    WaterList.insert(IP, OrigBB);
+
+  // Figure out how large the first NewMBB is.  (It cannot
+  // contain a constpool_entry or tablejump.)
+  unsigned NewBBSize = 0;
+  for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+       I != E; ++I)
+    NewBBSize += TII->GetInstSizeInBytes(I);
+
+  unsigned OrigBBI = OrigBB->getNumber();
+  unsigned NewBBI = NewBB->getNumber();
+  // Set the size of NewBB in BBSizes.
+  BBSizes[NewBBI] = NewBBSize;
+
+  // We removed instructions from UserMBB, subtract that off from its size.
+  // Add 2 or 4 to the block to count the unconditional branch we added to it.
+  unsigned delta = isThumb ? 2 : 4;
+  BBSizes[OrigBBI] -= NewBBSize - delta;
+
+  // ...and adjust BBOffsets for NewBB accordingly.
+  BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+
+  // All BBOffsets following these blocks must be modified.
+  AdjustBBOffsetsAfter(NewBB, delta);
+
+  return NewBB;
+}
+
+/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+                      unsigned TrialOffset, unsigned MaxDisp, bool NegativeOK) {
+  // On Thumb offsets==2 mod 4 are rounded down by the hardware for
+  // purposes of the displacement computation; compensate for that here.
+  // Effectively, the valid range of displacements is 2 bytes smaller for such
+  // references.
+  if (isThumb && UserOffset%4 !=0)
+    UserOffset -= 2;
+  // CPEs will be rounded up to a multiple of 4.
+  if (isThumb && TrialOffset%4 != 0)
+    TrialOffset += 2;
+
+  if (UserOffset <= TrialOffset) {
+    // User before the Trial.
+    if (TrialOffset-UserOffset <= MaxDisp)
+      return true;
+  } else if (NegativeOK) {
+    if (UserOffset-TrialOffset <= MaxDisp)
+      return true;
+  }
+  return false;
+}
+
+/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+
+bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
+                         MachineBasicBlock* Water, CPUser &U)
+{
+  unsigned MaxDisp = U.MaxDisp;
+  MachineFunction::iterator I = next(MachineFunction::iterator(Water));
+  unsigned CPEOffset = BBOffsets[Water->getNumber()] +
+                       BBSizes[Water->getNumber()];
+
+  // If the CPE is to be inserted before the instruction, that will raise
+  // the offset of the instruction.  (Currently applies only to ARM, so
+  // no alignment compensation attempted here.)
+  if (CPEOffset < UserOffset)
+    UserOffset += U.CPEMI->getOperand(2).getImm();
+
+  return OffsetIsInRange (UserOffset, CPEOffset, MaxDisp, !isThumb);
+}
+
+/// CPEIsInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+                                      MachineInstr *CPEMI,
+                                      unsigned MaxDisp, bool DoDump) {
+  unsigned CPEOffset  = GetOffsetOf(CPEMI);
+  assert(CPEOffset%4 == 0 && "Misaligned CPE");
+
+  if (DoDump) {
+    DOUT << "User of CPE#" << CPEMI->getOperand(0).getImm()
+         << " max delta=" << MaxDisp
+         << " insn address=" << UserOffset
+         << " CPE address=" << CPEOffset
+         << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI;
+  }
+
+  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, !isThumb);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+    return false;
+
+  MachineBasicBlock *Succ = *MBB->succ_begin();
+  MachineBasicBlock *Pred = *MBB->pred_begin();
+  MachineInstr *PredMI = &Pred->back();
+  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB)
+    return PredMI->getOperand(0).getMBB() == Succ;
+  return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
+                                              int delta) {
+  MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
+  for(unsigned i=BB->getNumber()+1; i<BB->getParent()->getNumBlockIDs(); i++) {
+    BBOffsets[i] += delta;
+    // If some existing blocks have padding, adjust the padding as needed, a
+    // bit tricky.  delta can be negative so don't use % on that.
+    if (isThumb) {
+      MachineBasicBlock *MBB = MBBI;
+      if (!MBB->empty()) {
+        // Constant pool entries require padding.
+        if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+          unsigned oldOffset = BBOffsets[i] - delta;
+          if (oldOffset%4==0 && BBOffsets[i]%4!=0) {
+            // add new padding
+            BBSizes[i] += 2;
+            delta += 2;
+          } else if (oldOffset%4!=0 && BBOffsets[i]%4==0) {
+            // remove existing padding
+            BBSizes[i] -=2;
+            delta -= 2;
+          }
+        }
+        // Thumb jump tables require padding.  They should be at the end;
+        // following unconditional branches are removed by AnalyzeBranch.
+        MachineInstr *ThumbJTMI = NULL;
+        if (prior(MBB->end())->getOpcode() == ARM::tBR_JTr)
+          ThumbJTMI = prior(MBB->end());
+        if (ThumbJTMI) {
+          unsigned newMIOffset = GetOffsetOf(ThumbJTMI);
+          unsigned oldMIOffset = newMIOffset - delta;
+          if (oldMIOffset%4 == 0 && newMIOffset%4 != 0) {
+            // remove existing padding
+            BBSizes[i] -= 2;
+            delta -= 2;
+          } else if (oldMIOffset%4 != 0 && newMIOffset%4 == 0) {
+            // add new padding
+            BBSizes[i] += 2;
+            delta += 2;
+          }
+        }
+        if (delta==0)
+          return;
+      }
+      MBBI = next(MBBI);
+    }
+  }
+}
+
+/// DecrementOldEntry - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount.  If the refcount
+/// becomes 0 remove the entry and instruction.  Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+  // Find the old entry. Eliminate it if it is no longer used.
+  CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+  assert(CPE && "Unexpected!");
+  if (--CPE->RefCount == 0) {
+    RemoveDeadCPEMI(CPEMI);
+    CPE->CPEMI = NULL;
+    NumCPEs--;
+    return true;
+  }
+  return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone.  Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+{
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+
+  // Check to see if the CPE is already in-range.
+  if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, true)) {
+    DOUT << "In range\n";
+    return 1;
+  }
+
+  // No.  Look for previously created clones of the CPE that are in range.
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  std::vector<CPEntry> &CPEs = CPEntries[CPI];
+  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+    // We already tried this one
+    if (CPEs[i].CPEMI == CPEMI)
+      continue;
+    // Removing CPEs can leave empty entries, skip
+    if (CPEs[i].CPEMI == NULL)
+      continue;
+    if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, false)) {
+      DOUT << "Replacing CPE#" << CPI << " with CPE#" << CPEs[i].CPI << "\n";
+      // Point the CPUser node to the replacement
+      U.CPEMI = CPEs[i].CPEMI;
+      // Change the CPI in the instruction operand to refer to the clone.
+      for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+        if (UserMI->getOperand(j).isCPI()) {
+          UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+          break;
+        }
+      // Adjust the refcount of the clone...
+      CPEs[i].RefCount++;
+      // ...and the original.  If we didn't remove the old entry, none of the
+      // addresses changed, so we don't need another pass.
+      return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+    }
+  }
+  return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+  return (Opc == ARM::tB) ? ((1<<10)-1)*2 : ((1<<23)-1)*4;
+}
+
+/// AcceptWater - Small amount of common code factored out of the following.
+
+MachineBasicBlock* ARMConstantIslands::AcceptWater(MachineBasicBlock *WaterBB,
+                          std::vector<MachineBasicBlock*>::iterator IP) {
+  DOUT << "found water in range\n";
+  // Remove the original WaterList entry; we want subsequent
+  // insertions in this vicinity to go after the one we're
+  // about to insert.  This considerably reduces the number
+  // of times we have to move the same CPE more than once.
+  WaterList.erase(IP);
+  // CPE goes before following block (NewMBB).
+  return next(MachineFunction::iterator(WaterBB));
+}
+
+/// LookForWater - look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not.  If it returns true, *NewMBB
+/// is set to the WaterList entry.
+/// For ARM, we prefer the water that's farthest away.  For Thumb, prefer
+/// water that will not introduce padding to water that will; within each
+/// group, prefer the water that's farthest away.
+
+bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+                                      MachineBasicBlock** NewMBB) {
+  std::vector<MachineBasicBlock*>::iterator IPThatWouldPad;
+  MachineBasicBlock* WaterBBThatWouldPad = NULL;
+  if (!WaterList.empty()) {
+    for (std::vector<MachineBasicBlock*>::iterator IP = prior(WaterList.end()),
+        B = WaterList.begin();; --IP) {
+      MachineBasicBlock* WaterBB = *IP;
+      if (WaterIsInRange(UserOffset, WaterBB, U)) {
+        if (isThumb &&
+            (BBOffsets[WaterBB->getNumber()] +
+             BBSizes[WaterBB->getNumber()])%4 != 0) {
+          // This is valid Water, but would introduce padding.  Remember
+          // it in case we don't find any Water that doesn't do this.
+          if (!WaterBBThatWouldPad) {
+            WaterBBThatWouldPad = WaterBB;
+            IPThatWouldPad = IP;
+          }
+        } else {
+          *NewMBB = AcceptWater(WaterBB, IP);
+          return true;
+        }
+    }
+      if (IP == B)
+        break;
+    }
+  }
+  if (isThumb && WaterBBThatWouldPad) {
+    *NewMBB = AcceptWater(WaterBBThatWouldPad, IPThatWouldPad);
+    return true;
+  }
+  return false;
+}
+
+/// CreateNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct.  Otherwise the block is split to create a hole with an
+/// unconditional branch around it.  In either case *NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+
+void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+                        unsigned UserOffset, MachineBasicBlock** NewMBB) {
+  CPUser &U = CPUsers[CPUserIndex];
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+  MachineBasicBlock *UserMBB = UserMI->getParent();
+  unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
+                               BBSizes[UserMBB->getNumber()];
+  assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+
+  // If the use is at the end of the block, or the end of the block
+  // is within range, make new water there.  (The addition below is
+  // for the unconditional branch we will be adding:  4 bytes on ARM,
+  // 2 on Thumb.  Possible Thumb alignment padding is allowed for
+  // inside OffsetIsInRange.
+  // If the block ends in an unconditional branch already, it is water,
+  // and is known to be out of range, so we'll always be adding a branch.)
+  if (&UserMBB->back() == UserMI ||
+      OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb ? 2: 4),
+           U.MaxDisp, !isThumb)) {
+    DOUT << "Split at end of block\n";
+    if (&UserMBB->back() == UserMI)
+      assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
+    *NewMBB = next(MachineFunction::iterator(UserMBB));
+    // Add an unconditional branch from UserMBB to fallthrough block.
+    // Record it for branch lengthening; this new branch will not get out of
+    // range, but if the preceding conditional branch is out of range, the
+    // targets will be exchanged, and the altered branch may be out of
+    // range, so the machinery has to know about it.
+    int UncondBr = isThumb ? ARM::tB : ARM::B;
+    BuildMI(UserMBB, DebugLoc::getUnknownLoc(),
+            TII->get(UncondBr)).addMBB(*NewMBB);
+    unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+    ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+                          MaxDisp, false, UncondBr));
+    int delta = isThumb ? 2 : 4;
+    BBSizes[UserMBB->getNumber()] += delta;
+    AdjustBBOffsetsAfter(UserMBB, delta);
+  } else {
+    // What a big block.  Find a place within the block to split it.
+    // This is a little tricky on Thumb since instructions are 2 bytes
+    // and constant pool entries are 4 bytes: if instruction I references
+    // island CPE, and instruction I+1 references CPE', it will
+    // not work well to put CPE as far forward as possible, since then
+    // CPE' cannot immediately follow it (that location is 2 bytes
+    // farther away from I+1 than CPE was from I) and we'd need to create
+    // a new island.  So, we make a first guess, then walk through the
+    // instructions between the one currently being looked at and the
+    // possible insertion point, and make sure any other instructions
+    // that reference CPEs will be able to use the same island area;
+    // if not, we back up the insertion point.
+
+    // The 4 in the following is for the unconditional branch we'll be
+    // inserting (allows for long branch on Thumb).  Alignment of the
+    // island is handled inside OffsetIsInRange.
+    unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
+    // This could point off the end of the block if we've already got
+    // constant pool entries following this block; only the last one is
+    // in the water list.  Back past any possible branches (allow for a
+    // conditional and a maximally long unconditional).
+    if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
+      BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
+                              (isThumb ? 6 : 8);
+    unsigned EndInsertOffset = BaseInsertOffset +
+           CPEMI->getOperand(2).getImm();
+    MachineBasicBlock::iterator MI = UserMI;
+    ++MI;
+    unsigned CPUIndex = CPUserIndex+1;
+    for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+         Offset < BaseInsertOffset;
+         Offset += TII->GetInstSizeInBytes(MI),
+            MI = next(MI)) {
+      if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
+        if (!OffsetIsInRange(Offset, EndInsertOffset,
+              CPUsers[CPUIndex].MaxDisp, !isThumb)) {
+          BaseInsertOffset -= (isThumb ? 2 : 4);
+          EndInsertOffset -= (isThumb ? 2 : 4);
+        }
+        // This is overly conservative, as we don't account for CPEMIs
+        // being reused within the block, but it doesn't matter much.
+        EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
+        CPUIndex++;
+      }
+    }
+    DOUT << "Split in middle of big block\n";
+    *NewMBB = SplitBlockBeforeInstr(prior(MI));
+  }
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range.  If so, pick up the constant pool value and move it some
+/// place in-range.  Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &Fn,
+                                                unsigned CPUserIndex) {
+  CPUser &U = CPUsers[CPUserIndex];
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned Size = CPEMI->getOperand(2).getImm();
+  MachineBasicBlock *NewMBB;
+  // Compute this only once, it's expensive.  The 4 or 8 is the value the
+  // hardware keeps in the PC (2 insns ahead of the reference).
+  unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+
+  // Special case: tLEApcrel are two instructions MI's. The actual user is the
+  // second instruction.
+  if (UserMI->getOpcode() == ARM::tLEApcrel)
+    UserOffset += 2;
+
+  // See if the current entry is within range, or there is a clone of it
+  // in range.
+  int result = LookForExistingCPEntry(U, UserOffset);
+  if (result==1) return false;
+  else if (result==2) return true;
+
+  // No existing clone of this CPE is within range.
+  // We will be generating a new clone.  Get a UID for it.
+  unsigned ID = AFI->createConstPoolEntryUId();
+
+  // Look for water where we can place this CPE.  We look for the farthest one
+  // away that will work.  Forward references only for now (although later
+  // we might find some that are backwards).
+
+  if (!LookForWater(U, UserOffset, &NewMBB)) {
+    // No water found.
+    DOUT << "No water found\n";
+    CreateNewWater(CPUserIndex, UserOffset, &NewMBB);
+  }
+
+  // Okay, we know we can put an island before NewMBB now, do it!
+  MachineBasicBlock *NewIsland = Fn.CreateMachineBasicBlock();
+  Fn.insert(NewMBB, NewIsland);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  UpdateForInsertedWaterBlock(NewIsland);
+
+  // Decrement the old entry, and remove it if refcount becomes 0.
+  DecrementOldEntry(CPI, CPEMI);
+
+  // Now that we have an island to add the CPE to, clone the original CPE and
+  // add it to the island.
+  U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(),
+                    TII->get(ARM::CONSTPOOL_ENTRY))
+                .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+  CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+  NumCPEs++;
+
+  BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
+  // Compensate for .align 2 in thumb mode.
+  if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0)
+    Size += 2;
+  // Increase the size of the island block to account for the new entry.
+  BBSizes[NewIsland->getNumber()] += Size;
+  AdjustBBOffsetsAfter(NewIsland, Size);
+
+  // Finally, change the CPI in the instruction operand to be ID.
+  for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+    if (UserMI->getOperand(i).isCPI()) {
+      UserMI->getOperand(i).setIndex(ID);
+      break;
+    }
+
+  DOUT << "  Moved CPE to #" << ID << " CPI=" << CPI << "\t" << *UserMI;
+
+  return true;
+}
+
+/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+  MachineBasicBlock *CPEBB = CPEMI->getParent();
+  unsigned Size = CPEMI->getOperand(2).getImm();
+  CPEMI->eraseFromParent();
+  BBSizes[CPEBB->getNumber()] -= Size;
+  // All succeeding offsets have the current size value added in, fix this.
+  if (CPEBB->empty()) {
+    // In thumb mode, the size of island may be  padded by two to compensate for
+    // the alignment requirement.  Then it will now be 2 when the block is
+    // empty, so fix this.
+    // All succeeding offsets have the current size value added in, fix this.
+    if (BBSizes[CPEBB->getNumber()] != 0) {
+      Size += BBSizes[CPEBB->getNumber()];
+      BBSizes[CPEBB->getNumber()] = 0;
+    }
+  }
+  AdjustBBOffsetsAfter(CPEBB, -Size);
+  // An island has only one predecessor BB and one successor BB. Check if
+  // this BB's predecessor jumps directly to this BB's successor. This
+  // shouldn't happen currently.
+  assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+  // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::RemoveUnusedCPEntries() {
+  unsigned MadeChange = false;
+  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+      std::vector<CPEntry> &CPEs = CPEntries[i];
+      for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+        if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+          RemoveDeadCPEMI(CPEs[j].CPEMI);
+          CPEs[j].CPEMI = NULL;
+          MadeChange = true;
+        }
+      }
+  }
+  return MadeChange;
+}
+
+/// BBIsInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+                                     unsigned MaxDisp) {
+  unsigned PCAdj      = isThumb ? 4 : 8;
+  unsigned BrOffset   = GetOffsetOf(MI) + PCAdj;
+  unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+
+  DOUT << "Branch of destination BB#" << DestBB->getNumber()
+       << " from BB#" << MI->getParent()->getNumber()
+       << " max delta=" << MaxDisp
+       << " from " << GetOffsetOf(MI) << " to " << DestOffset
+       << " offset " << int(DestOffset-BrOffset) << "\t" << *MI;
+
+  if (BrOffset <= DestOffset) {
+    // Branch before the Dest.
+    if (DestOffset-BrOffset <= MaxDisp)
+      return true;
+  } else {
+    if (BrOffset-DestOffset <= MaxDisp)
+      return true;
+  }
+  return false;
+}
+
+/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &Fn, ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+  // Check to see if the DestBB is already in-range.
+  if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+    return false;
+
+  if (!Br.isCond)
+    return FixUpUnconditionalBr(Fn, Br);
+  return FixUpConditionalBr(Fn, Br);
+}
+
+/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *MBB = MI->getParent();
+  assert(isThumb && "Expected a Thumb function!");
+
+  // Use BL to implement far jump.
+  Br.MaxDisp = (1 << 21) * 2;
+  MI->setDesc(TII->get(ARM::tBfar));
+  BBSizes[MBB->getNumber()] += 2;
+  AdjustBBOffsetsAfter(MBB, 2);
+  HasFarJump = true;
+  NumUBrFixed++;
+
+  DOUT << "  Changed B to long jump " << *MI;
+
+  return true;
+}
+
+/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &Fn, ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+  // Add an unconditional branch to the destination and invert the branch
+  // condition to jump over it:
+  // blt L1
+  // =>
+  // bge L2
+  // b   L1
+  // L2:
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+  CC = ARMCC::getOppositeCondition(CC);
+  unsigned CCReg = MI->getOperand(2).getReg();
+
+  // If the branch is at the end of its MBB and that has a fall-through block,
+  // direct the updated conditional branch to the fall-through block. Otherwise,
+  // split the MBB before the next instruction.
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstr *BMI = &MBB->back();
+  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+  NumCBrFixed++;
+  if (BMI != MI) {
+    if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+        BMI->getOpcode() == Br.UncondBr) {
+      // Last MI in the BB is an unconditional branch. Can we simply invert the
+      // condition and swap destinations:
+      // beq L1
+      // b   L2
+      // =>
+      // bne L2
+      // b   L1
+      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+      if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
+        DOUT << "  Invert Bcc condition and swap its destination with " << *BMI;
+        BMI->getOperand(0).setMBB(DestBB);
+        MI->getOperand(0).setMBB(NewDest);
+        MI->getOperand(1).setImm(CC);
+        return true;
+      }
+    }
+  }
+
+  if (NeedSplit) {
+    SplitBlockBeforeInstr(MI);
+    // No need for the branch to the next block. We're adding an unconditional
+    // branch to the destination.
+    int delta = TII->GetInstSizeInBytes(&MBB->back());
+    BBSizes[MBB->getNumber()] -= delta;
+    MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB));
+    AdjustBBOffsetsAfter(SplitBB, -delta);
+    MBB->back().eraseFromParent();
+    // BBOffsets[SplitBB] is wrong temporarily, fixed below
+  }
+  MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+
+  DOUT << "  Insert B to BB#" << DestBB->getNumber()
+       << " also invert condition and change dest. to BB#"
+       << NextBB->getNumber() << "\n";
+
+  // Insert a new conditional branch and a new unconditional branch.
+  // Also update the ImmBranch as well as adding a new entry for the new branch.
+  BuildMI(MBB, DebugLoc::getUnknownLoc(),
+          TII->get(MI->getOpcode()))
+    .addMBB(NextBB).addImm(CC).addReg(CCReg);
+  Br.MI = &MBB->back();
+  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  BuildMI(MBB, DebugLoc::getUnknownLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+  ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+  // Remove the old conditional branch.  It may or may not still be in MBB.
+  BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+  MI->eraseFromParent();
+
+  // The net size change is an addition of one unconditional branch.
+  int delta = TII->GetInstSizeInBytes(&MBB->back());
+  AdjustBBOffsetsAfter(MBB, delta);
+  return true;
+}
+
+/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc.
+bool ARMConstantIslands::UndoLRSpillRestore() {
+  bool MadeChange = false;
+  for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+    MachineInstr *MI = PushPopMIs[i];
+    if (MI->getOpcode() == ARM::tPOP_RET &&
+        MI->getOperand(0).getReg() == ARM::PC &&
+        MI->getNumExplicitOperands() == 1) {
+      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
+      MI->eraseFromParent();
+      MadeChange = true;
+    }
+  }
+  return MadeChange;
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 0000000..3a038c9
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,100 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/Support/raw_ostream.h"
+#include <ostream>
+using namespace llvm;
+
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+                                           ARMCP::ARMCPKind k,
+                                           unsigned char PCAdj,
+                                           const char *Modif,
+                                           bool AddCA)
+  : MachineConstantPoolValue((const Type*)gv->getType()),
+    GV(gv), S(NULL), LabelId(id), Kind(k), PCAdjust(PCAdj),
+    Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(const char *s, unsigned id,
+                                           ARMCP::ARMCPKind k,
+                                           unsigned char PCAdj,
+                                           const char *Modif,
+                                           bool AddCA)
+  : MachineConstantPoolValue((const Type*)Type::Int32Ty),
+    GV(NULL), S(s), LabelId(id), Kind(k), PCAdjust(PCAdj),
+    Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv,
+                                           ARMCP::ARMCPKind k,
+                                           const char *Modif)
+  : MachineConstantPoolValue((const Type*)Type::Int32Ty),
+    GV(gv), S(NULL), LabelId(0), Kind(k), PCAdjust(0),
+    Modifier(Modif) {}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+                                                    unsigned Alignment) {
+  unsigned AlignMask = Alignment - 1;
+  const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    if (Constants[i].isMachineConstantPoolEntry() &&
+        (Constants[i].getAlignment() & AlignMask) == 0) {
+      ARMConstantPoolValue *CPV =
+        (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+      if (CPV->GV == GV &&
+          CPV->S == S &&
+          CPV->LabelId == LabelId &&
+          CPV->Kind == Kind &&
+          CPV->PCAdjust == PCAdjust)
+        return i;
+    }
+  }
+
+  return -1;
+}
+
+void
+ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+  ID.AddPointer(GV);
+  ID.AddPointer(S);
+  ID.AddInteger(LabelId);
+  ID.AddInteger((unsigned)Kind);
+  ID.AddInteger(PCAdjust);
+}
+
+void ARMConstantPoolValue::dump() const {
+  cerr << "  " << *this;
+}
+
+void ARMConstantPoolValue::print(std::ostream &O) const {
+  raw_os_ostream RawOS(O);
+  print(RawOS);
+}
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+  if (GV)
+    O << GV->getName();
+  else
+    O << S;
+  if (isNonLazyPointer()) O << "$non_lazy_ptr";
+  else if (isStub()) O << "$stub";
+  if (Modifier) O << "(" << Modifier << ")";
+  if (PCAdjust != 0) {
+    O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+    if (AddCurrentAddress) O << "-.";
+    O << ")";
+  }
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 0000000..d2b9066
--- /dev/null
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,92 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include <iosfwd>
+
+namespace llvm {
+
+class GlobalValue;
+
+namespace ARMCP {
+  enum ARMCPKind {
+    CPValue,
+    CPNonLazyPtr,
+    CPStub
+  };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC relative displacement between the address of the load
+/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+  GlobalValue *GV;         // GlobalValue being loaded.
+  const char *S;           // ExtSymbol being loaded.
+  unsigned LabelId;        // Label id of the load.
+  ARMCP::ARMCPKind Kind;   // non_lazy_ptr or stub?
+  unsigned char PCAdjust;  // Extra adjustment if constantpool is pc relative.
+                           // 8 for ARM, 4 for Thumb.
+  const char *Modifier;    // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+  bool AddCurrentAddress;
+
+public:
+  ARMConstantPoolValue(GlobalValue *gv, unsigned id,
+                       ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+                       unsigned char PCAdj = 0, const char *Modifier = NULL,
+                       bool AddCurrentAddress = false);
+  ARMConstantPoolValue(const char *s, unsigned id,
+                       ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+                       unsigned char PCAdj = 0, const char *Modifier = NULL,
+                       bool AddCurrentAddress = false);
+  ARMConstantPoolValue(GlobalValue *GV, ARMCP::ARMCPKind Kind,
+                       const char *Modifier);
+
+
+  GlobalValue *getGV() const { return GV; }
+  const char *getSymbol() const { return S; }
+  const char *getModifier() const { return Modifier; }
+  bool hasModifier() const { return Modifier != NULL; }
+  bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+  unsigned getLabelId() const { return LabelId; }
+  bool isNonLazyPointer() const { return Kind == ARMCP::CPNonLazyPtr; }
+  bool isStub() const { return Kind == ARMCP::CPStub; }
+  unsigned char getPCAdjustment() const { return PCAdjust; }
+
+  virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+                                        unsigned Alignment);
+
+  virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+  void print(std::ostream *O) const { if (O) print(*O); }
+  void print(std::ostream &O) const;
+  void print(raw_ostream *O) const { if (O) print(*O); }
+  void print(raw_ostream &O) const;
+  void dump() const;
+};
+
+  inline std::ostream &operator<<(std::ostream &O, const ARMConstantPoolValue &V) {
+  V.print(O);
+  return O;
+}
+  
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+  V.print(O);
+  return O;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMFrameInfo.h b/lib/Target/ARM/ARMFrameInfo.h
new file mode 100644
index 0000000..405b8f2
--- /dev/null
+++ b/lib/Target/ARM/ARMFrameInfo.h
@@ -0,0 +1,32 @@
+//===-- ARMTargetFrameInfo.h - Define TargetFrameInfo for ARM ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "ARMSubtarget.h"
+
+namespace llvm {
+
+class ARMFrameInfo : public TargetFrameInfo {
+public:
+  explicit ARMFrameInfo(const ARMSubtarget &ST)
+    : TargetFrameInfo(StackGrowsDown, ST.getStackAlignment(), 0) {
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 0000000..ca3a9cb
--- /dev/null
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,911 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class ARMDAGToDAGISel : public SelectionDAGISel {
+  ARMTargetMachine &TM;
+
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARMSubtarget *Subtarget;
+
+public:
+  explicit ARMDAGToDAGISel(ARMTargetMachine &tm)
+    : SelectionDAGISel(tm), TM(tm),
+    Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+  }
+
+  virtual const char *getPassName() const {
+    return "ARM Instruction Selection";
+  } 
+  
+  SDNode *Select(SDValue Op);
+  virtual void InstructionSelect();
+  bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
+                       SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode2Offset(SDValue Op, SDValue N,
+                             SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode3(SDValue Op, SDValue N, SDValue &Base,
+                       SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode3Offset(SDValue Op, SDValue N,
+                             SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
+                       SDValue &Offset);
+
+  bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
+                         SDValue &Label);
+
+  bool SelectThumbAddrModeRR(SDValue Op, SDValue N, SDValue &Base,
+                             SDValue &Offset);
+  bool SelectThumbAddrModeRI5(SDValue Op, SDValue N, unsigned Scale,
+                              SDValue &Base, SDValue &OffImm,
+                              SDValue &Offset);
+  bool SelectThumbAddrModeS1(SDValue Op, SDValue N, SDValue &Base,
+                             SDValue &OffImm, SDValue &Offset);
+  bool SelectThumbAddrModeS2(SDValue Op, SDValue N, SDValue &Base,
+                             SDValue &OffImm, SDValue &Offset);
+  bool SelectThumbAddrModeS4(SDValue Op, SDValue N, SDValue &Base,
+                             SDValue &OffImm, SDValue &Offset);
+  bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
+                             SDValue &OffImm);
+
+  bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
+                               SDValue &B, SDValue &C);
+  
+  // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+    /// inline asm expressions.
+    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                              char ConstraintCode,
+                                              std::vector<SDValue> &OutOps);
+};
+}
+
+void ARMDAGToDAGISel::InstructionSelect() {
+  DEBUG(BB->dump());
+
+  SelectRoot(*CurDAG);
+  CurDAG->RemoveDeadNodes();
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N,
+                                      SDValue &Base, SDValue &Offset,
+                                      SDValue &Opc) {
+  if (N.getOpcode() == ISD::MUL) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      // X * [3,5,9] -> X + X * [2,4,8] etc.
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC & 1) {
+        RHSC = RHSC & ~1;
+        ARM_AM::AddrOpc AddSub = ARM_AM::add;
+        if (RHSC < 0) {
+          AddSub = ARM_AM::sub;
+          RHSC = - RHSC;
+        }
+        if (isPowerOf2_32(RHSC)) {
+          unsigned ShAmt = Log2_32(RHSC);
+          Base = Offset = N.getOperand(0);
+          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+                                                            ARM_AM::lsl),
+                                          MVT::i32);
+          return true;
+        }
+      }
+    }
+  }
+
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper) {
+      Base = N.getOperand(0);
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return true;
+  }
+  
+  // Match simple R +/- imm12 operands.
+  if (N.getOpcode() == ISD::ADD)
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if ((RHSC >= 0 && RHSC < 0x1000) ||
+          (RHSC < 0 && RHSC > -0x1000)) { // 12 bits.
+        Base = N.getOperand(0);
+        if (Base.getOpcode() == ISD::FrameIndex) {
+          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+          Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+        }
+        Offset = CurDAG->getRegister(0, MVT::i32);
+
+        ARM_AM::AddrOpc AddSub = ARM_AM::add;
+        if (RHSC < 0) {
+          AddSub = ARM_AM::sub;
+          RHSC = - RHSC;
+        }
+        Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+                                                          ARM_AM::no_shift),
+                                        MVT::i32);
+        return true;
+      }
+    }
+  
+  // Otherwise this is R +/- [possibly shifted] R
+  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+  unsigned ShAmt = 0;
+  
+  Base   = N.getOperand(0);
+  Offset = N.getOperand(1);
+  
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh =
+           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      Offset = N.getOperand(1).getOperand(0);
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+  
+  // Try matching (R shl C) + (R).
+  if (N.getOpcode() == ISD::ADD && ShOpcVal == ARM_AM::no_shift) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+    if (ShOpcVal != ARM_AM::no_shift) {
+      // Check to see if the RHS of the shift is a constant, if not, we can't
+      // fold it.
+      if (ConstantSDNode *Sh =
+          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+        ShAmt = Sh->getZExtValue();
+        Offset = N.getOperand(0).getOperand(0);
+        Base = N.getOperand(1);
+      } else {
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    }
+  }
+  
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDValue Op, SDValue N,
+                                            SDValue &Offset, SDValue &Opc) {
+  unsigned Opcode = Op.getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+    ? ARM_AM::add : ARM_AM::sub;
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+    int Val = (int)C->getZExtValue();
+    if (Val >= 0 && Val < 0x1000) { // 12 bits.
+      Offset = CurDAG->getRegister(0, MVT::i32);
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+                                                        ARM_AM::no_shift),
+                                      MVT::i32);
+      return true;
+    }
+  }
+
+  Offset = N;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+  unsigned ShAmt = 0;
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      Offset = N.getOperand(0);
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue Op, SDValue N,
+                                      SDValue &Base, SDValue &Offset,
+                                      SDValue &Opc) {
+  if (N.getOpcode() == ISD::SUB) {
+    // X - C  is canonicalize to X + -C, no need to handle it here.
+    Base = N.getOperand(0);
+    Offset = N.getOperand(1);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+    return true;
+  }
+  
+  if (N.getOpcode() != ISD::ADD) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+    return true;
+  }
+  
+  // If the RHS is +/- imm8, fold into addr mode.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if ((RHSC >= 0 && RHSC < 256) ||
+        (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      Offset = CurDAG->getRegister(0, MVT::i32);
+
+      ARM_AM::AddrOpc AddSub = ARM_AM::add;
+      if (RHSC < 0) {
+        AddSub = ARM_AM::sub;
+        RHSC = - RHSC;
+      }
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+      return true;
+    }
+  }
+  
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDValue Op, SDValue N,
+                                            SDValue &Offset, SDValue &Opc) {
+  unsigned Opcode = Op.getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+    ? ARM_AM::add : ARM_AM::sub;
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+    int Val = (int)C->getZExtValue();
+    if (Val >= 0 && Val < 256) {
+      Offset = CurDAG->getRegister(0, MVT::i32);
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+      return true;
+    }
+  }
+
+  Offset = N;
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+  return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
+                                      SDValue &Base, SDValue &Offset) {
+  if (N.getOpcode() != ISD::ADD) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper) {
+      Base = N.getOperand(0);
+    }
+    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+                                       MVT::i32);
+    return true;
+  }
+  
+  // If the RHS is +/- imm8, fold into addr mode.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if ((RHSC & 3) == 0) {  // The constant is implicitly multiplied by 4.
+      RHSC >>= 2;
+      if ((RHSC >= 0 && RHSC < 256) ||
+          (RHSC < 0 && RHSC > -256)) { // note -256 itself isn't allowed.
+        Base = N.getOperand(0);
+        if (Base.getOpcode() == ISD::FrameIndex) {
+          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+          Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+        }
+
+        ARM_AM::AddrOpc AddSub = ARM_AM::add;
+        if (RHSC < 0) {
+          AddSub = ARM_AM::sub;
+          RHSC = - RHSC;
+        }
+        Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+                                           MVT::i32);
+        return true;
+      }
+    }
+  }
+  
+  Base = N;
+  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+                                     MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue Op, SDValue N,
+                                        SDValue &Offset, SDValue &Label) {
+  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+    Offset = N.getOperand(0);
+    SDValue N1 = N.getOperand(1);
+    Label  = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+                                       MVT::i32);
+    return true;
+  }
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue Op, SDValue N,
+                                            SDValue &Base, SDValue &Offset){
+  // FIXME dl should come from the parent load or store, not the address
+  DebugLoc dl = Op.getDebugLoc();
+  if (N.getOpcode() != ISD::ADD) {
+    Base = N;
+    // We must materialize a zero in a reg! Returning a constant here
+    // wouldn't work without additional code to position the node within
+    // ISel's topological ordering in a place where ISel will process it
+    // normally.  Instead, just explicitly issue a tMOVri8 node!
+    Offset = SDValue(CurDAG->getTargetNode(ARM::tMOVi8, dl, MVT::i32,
+                                    CurDAG->getTargetConstant(0, MVT::i32)), 0);
+    return true;
+  }
+
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N,
+                                        unsigned Scale, SDValue &Base,
+                                        SDValue &OffImm, SDValue &Offset) {
+  if (Scale == 4) {
+    SDValue TmpBase, TmpOffImm;
+    if (SelectThumbAddrModeSP(Op, N, TmpBase, TmpOffImm))
+      return false;  // We want to select tLDRspi / tSTRspi instead.
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+      return false;  // We want to select tLDRpci instead.
+  }
+
+  if (N.getOpcode() != ISD::ADD) {
+    Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N;
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  // Thumb does not have [sp, r] address mode.
+  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+  if ((LHSR && LHSR->getReg() == ARM::SP) ||
+      (RHSR && RHSR->getReg() == ARM::SP)) {
+    Base = N;
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  // If the RHS is + imm5 * scale, fold into addr mode.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if ((RHSC & (Scale-1)) == 0) {  // The constant is implicitly multiplied.
+      RHSC /= Scale;
+      if (RHSC >= 0 && RHSC < 32) {
+        Base = N.getOperand(0);
+        Offset = CurDAG->getRegister(0, MVT::i32);
+        OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+        return true;
+      }
+    }
+  }
+
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS1(SDValue Op, SDValue N,
+                                            SDValue &Base, SDValue &OffImm,
+                                            SDValue &Offset) {
+  return SelectThumbAddrModeRI5(Op, N, 1, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS2(SDValue Op, SDValue N,
+                                            SDValue &Base, SDValue &OffImm,
+                                            SDValue &Offset) {
+  return SelectThumbAddrModeRI5(Op, N, 2, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeS4(SDValue Op, SDValue N,
+                                            SDValue &Base, SDValue &OffImm,
+                                            SDValue &Offset) {
+  return SelectThumbAddrModeRI5(Op, N, 4, Base, OffImm, Offset);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
+                                           SDValue &Base, SDValue &OffImm) {
+  if (N.getOpcode() == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (N.getOpcode() != ISD::ADD)
+    return false;
+
+  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+  if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+      (LHSR && LHSR->getReg() == ARM::SP)) {
+    // If the RHS is + imm8 * scale, fold into addr mode.
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if ((RHSC & 3) == 0) {  // The constant is implicitly multiplied.
+        RHSC >>= 2;
+        if (RHSC >= 0 && RHSC < 256) {
+          Base = N.getOperand(0);
+          if (Base.getOpcode() == ISD::FrameIndex) {
+            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+            Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+          }
+          OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+          return true;
+        }
+      }
+    }
+  }
+  
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
+                                              SDValue N, 
+                                              SDValue &BaseReg,
+                                              SDValue &ShReg,
+                                              SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+  
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getZExtValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
+  }
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
+  return true;
+}
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG) {
+  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+
+SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
+  SDNode *N = Op.getNode();
+  DebugLoc dl = N->getDebugLoc();
+
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+    bool UseCP = true;
+    if (Subtarget->isThumb())
+      UseCP = (Val > 255 &&                          // MOV
+               ~Val > 255 &&                         // MOV + MVN
+               !ARM_AM::isThumbImmShiftedVal(Val));  // MOV + LSL
+    else
+      UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&     // MOV
+               ARM_AM::getSOImmVal(~Val) == -1 &&    // MVN
+               !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
+    if (UseCP) {
+      SDValue CPIdx =
+        CurDAG->getTargetConstantPool(ConstantInt::get(Type::Int32Ty, Val),
+                                      TLI.getPointerTy());
+
+      SDNode *ResNode;
+      if (Subtarget->isThumb())
+        ResNode = CurDAG->getTargetNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other,
+                                        CPIdx, CurDAG->getEntryNode());
+      else {
+        SDValue Ops[] = {
+          CPIdx, 
+          CurDAG->getRegister(0, MVT::i32),
+          CurDAG->getTargetConstant(0, MVT::i32),
+          getAL(CurDAG),
+          CurDAG->getRegister(0, MVT::i32),
+          CurDAG->getEntryNode()
+        };
+        ResNode=CurDAG->getTargetNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+                                      Ops, 6);
+      }
+      ReplaceUses(Op, SDValue(ResNode, 0));
+      return NULL;
+    }
+      
+    // Other cases are autogenerated.
+    break;
+  }
+  case ISD::FrameIndex: {
+    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    if (Subtarget->isThumb()) {
+      return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
+                                  CurDAG->getTargetConstant(0, MVT::i32));
+    } else {
+      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+                          getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                          CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->SelectNodeTo(N, ARM::ADDri, MVT::i32, Ops, 5);
+    }
+  }
+  case ISD::ADD: {
+    if (!Subtarget->isThumb())
+      break;
+    // Select add sp, c to tADDhirr.
+    SDValue N0 = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(Op.getOperand(0));
+    RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(Op.getOperand(1));
+    if (LHSR && LHSR->getReg() == ARM::SP) {
+      std::swap(N0, N1);
+      std::swap(LHSR, RHSR);
+    }
+    if (RHSR && RHSR->getReg() == ARM::SP) {
+      SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVlor2hir, dl,
+                                  Op.getValueType(), N0, N0), 0);
+      return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1);
+    }
+    break;
+  }
+  case ISD::MUL:
+    if (Subtarget->isThumb())
+      break;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned RHSV = C->getZExtValue();
+      if (!RHSV) break;
+      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
+        SDValue V = Op.getOperand(0);
+        unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV-1));
+        SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+                            CurDAG->getTargetConstant(ShImm, MVT::i32),
+                            getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                            CurDAG->getRegister(0, MVT::i32) };
+        return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+      }
+      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
+        SDValue V = Op.getOperand(0);
+        unsigned ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, Log2_32(RHSV+1));
+        SDValue Ops[] = { V, V, CurDAG->getRegister(0, MVT::i32),
+                            CurDAG->getTargetConstant(ShImm, MVT::i32),
+                            getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                            CurDAG->getRegister(0, MVT::i32) };
+        return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+      }
+    }
+    break;
+  case ARMISD::FMRRD:
+    return CurDAG->getTargetNode(ARM::FMRRD, dl, MVT::i32, MVT::i32,
+                                 Op.getOperand(0), getAL(CurDAG),
+                                 CurDAG->getRegister(0, MVT::i32));
+  case ISD::UMUL_LOHI: {
+    SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+    return CurDAG->getTargetNode(ARM::UMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+  }
+  case ISD::SMUL_LOHI: {
+    SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+    return CurDAG->getTargetNode(ARM::SMULL, dl, MVT::i32, MVT::i32, Ops, 5);
+  }
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    ISD::MemIndexedMode AM = LD->getAddressingMode();
+    MVT LoadedVT = LD->getMemoryVT();
+    if (AM != ISD::UNINDEXED) {
+      SDValue Offset, AMOpc;
+      bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+      unsigned Opcode = 0;
+      bool Match = false;
+      if (LoadedVT == MVT::i32 &&
+          SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+        Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+        Match = true;
+      } else if (LoadedVT == MVT::i16 &&
+                 SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+          ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+          : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+      } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+        if (LD->getExtensionType() == ISD::SEXTLOAD) {
+          if (SelectAddrMode3Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+            Match = true;
+            Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+          }
+        } else {
+          if (SelectAddrMode2Offset(Op, LD->getOffset(), Offset, AMOpc)) {
+            Match = true;
+            Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+          }
+        }
+      }
+
+      if (Match) {
+        SDValue Chain = LD->getChain();
+        SDValue Base = LD->getBasePtr();
+        SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+                           CurDAG->getRegister(0, MVT::i32), Chain };
+        return CurDAG->getTargetNode(Opcode, dl, MVT::i32, MVT::i32,
+                                     MVT::Other, Ops, 6);
+      }
+    }
+    // Other cases are autogenerated.
+    break;
+  }
+  case ARMISD::BRCOND: {
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    unsigned Opc = Subtarget->isThumb() ? ARM::tBcc : ARM::Bcc;
+    SDValue Chain = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    SDValue N2 = Op.getOperand(2);
+    SDValue N3 = Op.getOperand(3);
+    SDValue InFlag = Op.getOperand(4);
+    assert(N1.getOpcode() == ISD::BasicBlock);
+    assert(N2.getOpcode() == ISD::Constant);
+    assert(N3.getOpcode() == ISD::Register);
+
+    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                               cast<ConstantSDNode>(N2)->getZExtValue()),
+                               MVT::i32);
+    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+    SDNode *ResNode = CurDAG->getTargetNode(Opc, dl, MVT::Other, 
+                                            MVT::Flag, Ops, 5);
+    Chain = SDValue(ResNode, 0);
+    if (Op.getNode()->getNumValues() == 2) {
+      InFlag = SDValue(ResNode, 1);
+      ReplaceUses(SDValue(Op.getNode(), 1), InFlag);
+    }
+    ReplaceUses(SDValue(Op.getNode(), 0), SDValue(Chain.getNode(), Chain.getResNo()));
+    return NULL;
+  }
+  case ARMISD::CMOV: {
+    bool isThumb = Subtarget->isThumb();
+    MVT VT = Op.getValueType();
+    SDValue N0 = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    SDValue N2 = Op.getOperand(2);
+    SDValue N3 = Op.getOperand(3);
+    SDValue InFlag = Op.getOperand(4);
+    assert(N2.getOpcode() == ISD::Constant);
+    assert(N3.getOpcode() == ISD::Register);
+
+    // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+    // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+    // Pattern complexity = 18  cost = 1  size = 0
+    SDValue CPTmp0;
+    SDValue CPTmp1;
+    SDValue CPTmp2;
+    if (!isThumb && VT == MVT::i32 &&
+        SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) {
+      SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                               cast<ConstantSDNode>(N2)->getZExtValue()),
+                               MVT::i32);
+      SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag };
+      return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7);
+    }
+
+    // Pattern: (ARMcmov:i32 GPR:i32:$false,
+    //             (imm:i32)<<P:Predicate_so_imm>><<X:so_imm_XFORM>>:$true,
+    //             (imm:i32):$cc)
+    // Emits: (MOVCCi:i32 GPR:i32:$false,
+    //           (so_imm_XFORM:i32 (imm:i32):$true), (imm:i32):$cc)
+    // Pattern complexity = 10  cost = 1  size = 0
+    if (VT == MVT::i32 &&
+        N3.getOpcode() == ISD::Constant &&
+        Predicate_so_imm(N3.getNode())) {
+      SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned)
+                               cast<ConstantSDNode>(N1)->getZExtValue()),
+                               MVT::i32);
+      Tmp1 = Transform_so_imm_XFORM(Tmp1.getNode());
+      SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                               cast<ConstantSDNode>(N2)->getZExtValue()),
+                               MVT::i32);
+      SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag };
+      return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCi, MVT::i32, Ops, 5);
+    }
+
+    // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+    // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+    //
+    // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+    // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 11  size = 0
+    //
+    // Also FCPYScc and FCPYDcc.
+    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                               cast<ConstantSDNode>(N2)->getZExtValue()),
+                               MVT::i32);
+    SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
+    unsigned Opc = 0;
+    switch (VT.getSimpleVT()) {
+    default: assert(false && "Illegal conditional move type!");
+      break;
+    case MVT::i32:
+      Opc = isThumb ? ARM::tMOVCCr : ARM::MOVCCr;
+      break;
+    case MVT::f32:
+      Opc = ARM::FCPYScc;
+      break;
+    case MVT::f64:
+      Opc = ARM::FCPYDcc;
+      break; 
+    }
+    return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+  }
+  case ARMISD::CNEG: {
+    MVT VT = Op.getValueType();
+    SDValue N0 = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    SDValue N2 = Op.getOperand(2);
+    SDValue N3 = Op.getOperand(3);
+    SDValue InFlag = Op.getOperand(4);
+    assert(N2.getOpcode() == ISD::Constant);
+    assert(N3.getOpcode() == ISD::Register);
+
+    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                               cast<ConstantSDNode>(N2)->getZExtValue()),
+                               MVT::i32);
+    SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag };
+    unsigned Opc = 0;
+    switch (VT.getSimpleVT()) {
+    default: assert(false && "Illegal conditional move type!");
+      break;
+    case MVT::f32:
+      Opc = ARM::FNEGScc;
+      break;
+    case MVT::f64:
+      Opc = ARM::FNEGDcc;
+      break;
+    }
+    return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
+  }
+
+  case ISD::DECLARE: {
+    SDValue Chain = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    SDValue N2 = Op.getOperand(2);
+    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N1);
+    // FIXME: handle VLAs.
+    if (!FINode) {
+      ReplaceUses(Op.getValue(0), Chain);
+      return NULL;
+    }
+    if (N2.getOpcode() == ARMISD::PIC_ADD && isa<LoadSDNode>(N2.getOperand(0)))
+      N2 = N2.getOperand(0);
+    LoadSDNode *Ld = dyn_cast<LoadSDNode>(N2);
+    if (!Ld) {
+      ReplaceUses(Op.getValue(0), Chain);
+      return NULL;
+    }
+    SDValue BasePtr = Ld->getBasePtr();
+    assert(BasePtr.getOpcode() == ARMISD::Wrapper &&
+           isa<ConstantPoolSDNode>(BasePtr.getOperand(0)) &&
+           "llvm.dbg.variable should be a constantpool node");
+    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(BasePtr.getOperand(0));
+    GlobalValue *GV = 0;
+    if (CP->isMachineConstantPoolEntry()) {
+      ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)CP->getMachineCPVal();
+      GV = ACPV->getGV();
+    } else
+      GV = dyn_cast<GlobalValue>(CP->getConstVal());
+    if (!GV) {
+      ReplaceUses(Op.getValue(0), Chain);
+      return NULL;
+    }
+    
+    SDValue Tmp1 = CurDAG->getTargetFrameIndex(FINode->getIndex(),
+                                               TLI.getPointerTy());
+    SDValue Tmp2 = CurDAG->getTargetGlobalAddress(GV, TLI.getPointerTy());
+    SDValue Ops[] = { Tmp1, Tmp2, Chain };
+    return CurDAG->getTargetNode(TargetInstrInfo::DECLARE, dl,
+                                 MVT::Other, Ops, 3);
+  }
+  }
+
+  return SelectCode(Op);
+}
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+
+  SDValue Base, Offset, Opc;
+  if (!SelectAddrMode2(Op, Op, Base, Offset, Opc))
+    return true;
+  
+  OutOps.push_back(Base);
+  OutOps.push_back(Offset);
+  OutOps.push_back(Opc);
+  return false;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) {
+  return new ARMDAGToDAGISel(TM);
+}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 0000000..c0fd9dc
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,2346 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instruction.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                   CCValAssign::LocInfo &LocInfo,
+                                   ISD::ArgFlagsTy &ArgFlags,
+                                   CCState &State);
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                    CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags,
+                                    CCState &State);
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State);
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State);
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+    : TargetLowering(TM), ARMPCLabelIndex(0) {
+  Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+  if (Subtarget->isTargetDarwin()) {
+    // Uses VFP for Thumb libfuncs if available.
+    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+      // Single-precision floating-point arithmetic.
+      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+      // Double-precision floating-point arithmetic.
+      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+      // Single-precision comparisons.
+      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
+      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
+
+      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
+      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
+
+      // Double-precision comparisons.
+      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
+      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
+
+      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
+      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
+
+      // Floating-point to integer conversions.
+      // i64 conversions are done via library routines even when generating VFP
+      // instructions, so use the same ones.
+      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+      // Conversions between floating types.
+      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
+
+      // Integer to floating-point conversions.
+      // i64 conversions are done via library routines even when generating VFP
+      // instructions, so use the same ones.
+      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+      // e.g., __floatunsidf vs. __floatunssidfvfp.
+      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+    }
+  }
+
+  // These libcalls are not available in 32-bit.
+  setLibcallName(RTLIB::SHL_I128, 0);
+  setLibcallName(RTLIB::SRL_I128, 0);
+  setLibcallName(RTLIB::SRA_I128, 0);
+
+  if (Subtarget->isThumb())
+    addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+  else
+    addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+    addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+    addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  }
+  computeRegisterProperties();
+
+  // ARM does not have f32 extending load.
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+  // ARM does not have i1 sign extending load.
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  // ARM supports all 4 flavors of integer indexed load / store.
+  for (unsigned im = (unsigned)ISD::PRE_INC;
+       im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+    setIndexedLoadAction(im,  MVT::i1,  Legal);
+    setIndexedLoadAction(im,  MVT::i8,  Legal);
+    setIndexedLoadAction(im,  MVT::i16, Legal);
+    setIndexedLoadAction(im,  MVT::i32, Legal);
+    setIndexedStoreAction(im, MVT::i1,  Legal);
+    setIndexedStoreAction(im, MVT::i8,  Legal);
+    setIndexedStoreAction(im, MVT::i16, Legal);
+    setIndexedStoreAction(im, MVT::i32, Legal);
+  }
+
+  // i64 operation support.
+  if (Subtarget->isThumb()) {
+    setOperationAction(ISD::MUL,     MVT::i64, Expand);
+    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
+    setOperationAction(ISD::MULHS,   MVT::i32, Expand);
+    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  } else {
+    setOperationAction(ISD::MUL,     MVT::i64, Expand);
+    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
+    if (!Subtarget->hasV6Ops())
+      setOperationAction(ISD::MULHS, MVT::i32, Expand);
+  }
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRL,       MVT::i64, Custom);
+  setOperationAction(ISD::SRA,       MVT::i64, Custom);
+
+  // ARM does not have ROTL.
+  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ,  MVT::i32, Expand);
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  if (!Subtarget->hasV5TOps() || Subtarget->isThumb())
+    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+  // Only ARMv6 has BSWAP.
+  if (!Subtarget->hasV6Ops())
+    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+  // These are expanded into libcalls.
+  setOperationAction(ISD::SDIV,  MVT::i32, Expand);
+  setOperationAction(ISD::UDIV,  MVT::i32, Expand);
+  setOperationAction(ISD::SREM,  MVT::i32, Expand);
+  setOperationAction(ISD::UREM,  MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+  // Support label based line numbers.
+  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
+  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
+
+  setOperationAction(ISD::RET,           MVT::Other, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
+  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
+  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
+  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
+  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
+
+  if (!Subtarget->hasV6Ops()) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
+  }
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb())
+    // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
+    setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
+  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
+  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
+  setOperationAction(ISD::SELECT,    MVT::i32, Expand);
+  setOperationAction(ISD::SELECT,    MVT::f32, Expand);
+  setOperationAction(ISD::SELECT,    MVT::f64, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
+  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
+  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
+  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
+
+  // We don't support sin/cos/fmod/copysign/pow
+  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
+  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
+  setOperationAction(ISD::FREM,      MVT::f64, Expand);
+  setOperationAction(ISD::FREM,      MVT::f32, Expand);
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+  }
+  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
+  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
+
+  // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb()) {
+    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+  }
+
+  // We have target-specific dag combine patterns for the following nodes:
+  // ARMISD::FMRRD  - No need to call setTargetDAGCombine
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::SUB);
+
+  setStackPointerRegisterToSaveRestore(ARM::SP);
+  setSchedulingPreference(SchedulingForRegPressure);
+  setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
+  setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
+
+  maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
+  // Do not enable CodePlacementOpt for now: it currently runs after the
+  // ARMConstantIslandPass and messes up branch relaxation and placement
+  // of constant islands.
+  // benefitFromCodePlacementOpt = true;
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
+  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
+  case ARMISD::CALL:          return "ARMISD::CALL";
+  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
+  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
+  case ARMISD::tCALL:         return "ARMISD::tCALL";
+  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
+  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
+  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
+  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
+  case ARMISD::CMP:           return "ARMISD::CMP";
+  case ARMISD::CMPNZ:         return "ARMISD::CMPNZ";
+  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
+  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
+  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
+  case ARMISD::CMOV:          return "ARMISD::CMOV";
+  case ARMISD::CNEG:          return "ARMISD::CNEG";
+
+  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
+  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
+  case ARMISD::SITOF:         return "ARMISD::SITOF";
+  case ARMISD::UITOF:         return "ARMISD::UITOF";
+
+  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
+  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
+  case ARMISD::RRX:           return "ARMISD::RRX";
+
+  case ARMISD::FMRRD:         return "ARMISD::FMRRD";
+  case ARMISD::FMDRR:         return "ARMISD::FMDRR";
+
+  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+  switch (CC) {
+  default: assert(0 && "Unknown condition code!");
+  case ISD::SETNE:  return ARMCC::NE;
+  case ISD::SETEQ:  return ARMCC::EQ;
+  case ISD::SETGT:  return ARMCC::GT;
+  case ISD::SETGE:  return ARMCC::GE;
+  case ISD::SETLT:  return ARMCC::LT;
+  case ISD::SETLE:  return ARMCC::LE;
+  case ISD::SETUGT: return ARMCC::HI;
+  case ISD::SETUGE: return ARMCC::HS;
+  case ISD::SETULT: return ARMCC::LO;
+  case ISD::SETULE: return ARMCC::LS;
+  }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. It
+/// returns true if the operands should be inverted to form the proper
+/// comparison.
+static bool FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+                        ARMCC::CondCodes &CondCode2) {
+  bool Invert = false;
+  CondCode2 = ARMCC::AL;
+  switch (CC) {
+  default: assert(0 && "Unknown FP condition!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+  case ISD::SETGT:
+  case ISD::SETOGT: CondCode = ARMCC::GT; break;
+  case ISD::SETGE:
+  case ISD::SETOGE: CondCode = ARMCC::GE; break;
+  case ISD::SETOLT: CondCode = ARMCC::MI; break;
+  case ISD::SETOLE: CondCode = ARMCC::GT; Invert = true; break;
+  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+  case ISD::SETO:   CondCode = ARMCC::VC; break;
+  case ISD::SETUO:  CondCode = ARMCC::VS; break;
+  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+  case ISD::SETUGT: CondCode = ARMCC::HI; break;
+  case ISD::SETUGE: CondCode = ARMCC::PL; break;
+  case ISD::SETLT:
+  case ISD::SETULT: CondCode = ARMCC::LT; break;
+  case ISD::SETLE:
+  case ISD::SETULE: CondCode = ARMCC::LE; break;
+  case ISD::SETNE:
+  case ISD::SETUNE: CondCode = ARMCC::NE; break;
+  }
+  return Invert;
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//
+//  The lower operations present on calling convention works on this order:
+//      LowerCALL (virt regs --> phys regs, virt regs --> stack)
+//      LowerFORMAL_ARGUMENTS (phys --> virt regs, stack --> virt regs)
+//      LowerRET (virt regs --> phys regs)
+//      LowerCALL (phys regs --> virt regs)
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                   CCValAssign::LocInfo &LocInfo,
+                                   ISD::ArgFlagsTy &ArgFlags,
+                                   CCState &State) {
+  static const unsigned HiRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+  static const unsigned LoRegList[] = { ARM::R1,
+                                        ARM::R2,
+                                        ARM::R3,
+                                        ARM::NoRegister };
+
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4);
+  if (Reg == 0) 
+    return false; // we didn't handle it
+
+  unsigned i;
+  for (i = 0; i < 4; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+  if (LoRegList[i] != ARM::NoRegister)
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                           MVT::i32, LocInfo));
+  else
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(4, 4),
+                                           MVT::i32, LocInfo));
+  return true;  // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                    CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags,
+                                    CCState &State) {
+  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+  if (Reg == 0)
+    return false; // we didn't handle it
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         MVT::i32, LocInfo));
+  return true;  // we handled it
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
+  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+  if (Reg == 0)
+    return false; // we didn't handle it
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, MVT::i32, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         MVT::i32, LocInfo));
+  return true;  // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State) {
+  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+                                   State);
+}
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers.  This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered.  The returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDNode *ARMTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+                unsigned CallingConv, SelectionDAG &DAG) {
+
+  DebugLoc dl = TheCall->getDebugLoc();
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  bool isVarArg = TheCall->isVarArg();
+  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
+  CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM);
+
+  SmallVector<SDValue, 8> ResultVals;
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign VA = RVLocs[i];
+
+    SDValue Val;
+    if (VA.needsCustom()) {
+      // Handle f64 as custom.
+      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Lo.getValue(1);
+      InFlag = Lo.getValue(2);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Hi.getValue(1);
+      InFlag = Hi.getValue(2);
+      Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+    } else {
+      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+                               InFlag);
+      Chain = Val.getValue(1);
+      InFlag = Val.getValue(2);
+    }
+
+    switch (VA.getLocInfo()) {
+    default: assert(0 && "Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val);
+      break;
+    }
+
+    ResultVals.push_back(Val);
+  }
+
+  // Merge everything together with a MERGE_VALUES node.
+  ResultVals.push_back(Chain);
+  return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
+                     &ResultVals[0], ResultVals.size()).getNode();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size".  Alignment information is
+/// specified by the specific parameter attribute.  The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                          DebugLoc dl) {
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+                       /*AlwaysInline=*/false, NULL, 0, NULL, 0);
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+                                    const SDValue &StackPtr,
+                                    const CCValAssign &VA, SDValue Chain,
+                                    SDValue Arg, ISD::ArgFlagsTy Flags) {
+  DebugLoc dl = TheCall->getDebugLoc();
+  unsigned LocMemOffset = VA.getLocMemOffset();
+  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+  if (Flags.isByVal()) {
+    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+  }
+  return DAG.getStore(Chain, dl, Arg, PtrOff,
+                      PseudoSourceValue::getStack(), LocMemOffset);
+}
+
+/// LowerCALL - Lowering a ISD::CALL node into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
+  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
+  MVT RetVT           = TheCall->getRetValType(0);
+  SDValue Chain       = TheCall->getChain();
+  unsigned CC         = TheCall->getCallingConv();
+  assert((CC == CallingConv::C ||
+          CC == CallingConv::Fast) && "unknown calling convention");
+  bool isVarArg       = TheCall->isVarArg();
+  SDValue Callee      = TheCall->getCallee();
+  DebugLoc dl         = TheCall->getDebugLoc();
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCInfo.AnalyzeCallOperands(TheCall, CC_ARM);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.  In the case
+  // of tail call optimization, arguments are handled later.
+  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+       i != e;
+       ++i, ++realArgIdx) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = TheCall->getArg(realArgIdx);
+    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(realArgIdx);
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: assert(0 && "Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // f64 is passed in i32 pairs and must be combined
+    if (VA.needsCustom()) {
+      SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+      VA = ArgLocs[++i]; // skip ahead to next loc
+      if (VA.isRegLoc())
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(1)));
+      else {
+        assert(VA.isMemLoc());
+        if (StackPtr.getNode() == 0)
+          StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+        MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
+                                               Chain, fmrrd.getValue(1),
+                                               Flags));
+      }
+    } else if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+      MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
+                                             Chain, Arg, Flags));
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  bool isDirect = false;
+  bool isARMFunc = false;
+  bool isLocalARMFunc = false;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    GlobalValue *GV = G->getGlobal();
+    isDirect = true;
+    bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
+                  GV->hasLinkOnceLinkage());
+    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+                   getTargetMachine().getRelocationModel() != Reloc::Static;
+    isARMFunc = !Subtarget->isThumb() || isStub;
+    // ARM call to a local ARM function is predicable.
+    isLocalARMFunc = !Subtarget->isThumb() && !isExt;
+    // tBX takes a register source operand.
+    if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+                                                           ARMCP::CPStub, 4);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr, NULL, 0);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+                           getPointerTy(), Callee, PICLabel);
+   } else
+      Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    isDirect = true;
+    bool isStub = Subtarget->isTargetDarwin() &&
+                  getTargetMachine().getRelocationModel() != Reloc::Static;
+    isARMFunc = !Subtarget->isThumb() || isStub;
+    // tBX takes a register source operand.
+    const char *Sym = S->getSymbol();
+    if (isARMFunc && Subtarget->isThumb() && !Subtarget->hasV5TOps()) {
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(Sym, ARMPCLabelIndex,
+                                                           ARMCP::CPStub, 4);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr, NULL, 0);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+                           getPointerTy(), Callee, PICLabel);
+    } else
+      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+  }
+
+  // FIXME: handle tail calls differently.
+  unsigned CallOpc;
+  if (Subtarget->isThumb()) {
+    if (!Subtarget->hasV5TOps() && (!isDirect || isARMFunc))
+      CallOpc = ARMISD::CALL_NOLINK;
+    else
+      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+  } else {
+    CallOpc = (isDirect || Subtarget->hasV5TOps())
+      ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
+      : ARMISD::CALL_NOLINK;
+  }
+  if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb()) {
+    // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
+    Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  std::vector<SDValue> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+  // Returns a chain and a flag for retval copy to use.
+  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
+                      &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  if (RetVT != MVT::Other)
+    InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
+                                 Op.getResNo());
+}
+
+SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
+  // The chain is always operand #0
+  SDValue Chain = Op.getOperand(0);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // CCValAssign - represent the assignment of the return value to a location.
+  SmallVector<CCValAssign, 16> RVLocs;
+  unsigned CC   = DAG.getMachineFunction().getFunction()->getCallingConv();
+  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+
+  // CCState - Info about the registers and stack slots.
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
+
+  // Analyze return values of ISD::RET.
+  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM);
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0, realRVLocIdx = 0;
+       i != RVLocs.size();
+       ++i, ++realRVLocIdx) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    // ISD::RET => ret chain, (regnum1,val1), ...
+    // So i*2+1 index only the regnums
+    SDValue Arg = Op.getOperand(realRVLocIdx*2+1);
+
+    switch (VA.getLocInfo()) {
+    default: assert(0 && "Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
+    // available.
+    if (VA.needsCustom()) {
+      SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+      Flag = Chain.getValue(1);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+                               Flag);
+    } else
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+    // Guarantee that all emitted copies are
+    // stuck together, avoiding something bad.
+    Flag = Chain.getValue(1);
+  }
+
+  SDValue result;
+  if (Flag.getNode())
+    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  else // Return Void
+    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+
+  return result;
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+  MVT PtrVT = Op.getValueType();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  SDValue Res;
+  if (CP->isMachineConstantPoolEntry())
+    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+                                    CP->getAlignment());
+  else
+    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+                                    CP->getAlignment());
+  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+                                                 SelectionDAG &DAG) {
+  DebugLoc dl = GA->getDebugLoc();
+  MVT PtrVT = getPointerTy();
+  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+  ARMConstantPoolValue *CPV =
+    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
+                             PCAdj, "tlsgd", true);
+  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0);
+  SDValue Chain = Argument.getValue(1);
+
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+  // call __tls_get_addr.
+  ArgListTy Args;
+  ArgListEntry Entry;
+  Entry.Node = Argument;
+  Entry.Ty = (const Type *) Type::Int32Ty;
+  Args.push_back(Entry);
+  // FIXME: is there useful debug info available here?
+  std::pair<SDValue, SDValue> CallResult =
+    LowerCallTo(Chain, (const Type *) Type::Int32Ty, false, false, false, false,
+                CallingConv::C, false,
+                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+  return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+                                        SelectionDAG &DAG) {
+  GlobalValue *GV = GA->getGlobal();
+  DebugLoc dl = GA->getDebugLoc();
+  SDValue Offset;
+  SDValue Chain = DAG.getEntryNode();
+  MVT PtrVT = getPointerTy();
+  // Get the Thread Pointer
+  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+  if (GV->isDeclaration()){
+    // initial exec model
+    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue,
+                               PCAdj, "gottpoff", true);
+    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+    Chain = Offset.getValue(1);
+
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+  } else {
+    // local exec model
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GV, ARMCP::CPValue, "tpoff");
+    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0);
+  }
+
+  // The address of the thread local variable is the add of the thread
+  // pointer with the offset of the variable.
+  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+  // TODO: implement the "local dynamic" model
+  assert(Subtarget->isTargetELF() &&
+         "TLS not implemented for non-ELF targets");
+  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+  // otherwise use the "Local Exec" TLS Model
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+    return LowerToTLSGeneralDynamicModel(GA, DAG);
+  else
+    return LowerToTLSExecModels(GA, DAG);
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+                                                 SelectionDAG &DAG) {
+  MVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  if (RelocM == Reloc::PIC_) {
+    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GV, ARMCP::CPValue, UseGOTOFF ? "GOTOFF":"GOT");
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                                 CPAddr, NULL, 0);
+    SDValue Chain = Result.getValue(1);
+    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
+    if (!UseGOTOFF)
+      Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
+    return Result;
+  } else {
+    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+  }
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol
+/// even in non-static mode.
+static bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) {
+  // If symbol visibility is hidden, the extra load is not needed if
+  // the symbol is definitely defined in the current translation unit.
+  bool isDecl = GV->isDeclaration() && !GV->hasNotBeenReadFromBitcode();
+  if (GV->hasHiddenVisibility() && (!isDecl && !GV->hasCommonLinkage()))
+    return false;
+  return RelocM != Reloc::Static && (isDecl || GV->isWeakForLinker());
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+                                                    SelectionDAG &DAG) {
+  MVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  bool IsIndirect = GVIsIndirectSymbol(GV, RelocM);
+  SDValue CPAddr;
+  if (RelocM == Reloc::Static)
+    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+  else {
+    unsigned PCAdj = (RelocM != Reloc::PIC_)
+      ? 0 : (Subtarget->isThumb() ? 4 : 8);
+    ARMCP::ARMCPKind Kind = IsIndirect ? ARMCP::CPNonLazyPtr
+      : ARMCP::CPValue;
+    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex,
+                                                         Kind, PCAdj);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  }
+  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+  SDValue Chain = Result.getValue(1);
+
+  if (RelocM == Reloc::PIC_) {
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+  }
+  if (IsIndirect)
+    Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0);
+
+  return Result;
+}
+
+SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
+                                                    SelectionDAG &DAG){
+  assert(Subtarget->isTargetELF() &&
+         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+  MVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+  ARMConstantPoolValue *CPV = new ARMConstantPoolValue("_GLOBAL_OFFSET_TABLE_",
+                                                       ARMPCLabelIndex,
+                                                       ARMCP::CPValue, PCAdj);
+  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0);
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  DebugLoc dl = Op.getDebugLoc();
+  switch (IntNo) {
+  default: return SDValue();    // Don't custom lower most intrinsics.
+  case Intrinsic::arm_thread_pointer:
+      return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+  case Intrinsic::eh_sjlj_setjmp:
+      SDValue Res = DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32,
+                         Op.getOperand(1));
+      return Res;
+  }
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+                            unsigned VarArgsFrameIndex) {
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  DebugLoc dl = Op.getDebugLoc();
+  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0);
+}
+
+SDValue
+ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  SDValue Root = Op.getOperand(0);
+  DebugLoc dl = Op.getDebugLoc();
+  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
+  unsigned CC = MF.getFunction()->getCallingConv();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
+  CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM);
+
+  SmallVector<SDValue, 16> ArgValues;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored in registers.
+    if (VA.isRegLoc()) {
+      MVT RegVT = VA.getLocVT();
+      TargetRegisterClass *RC;
+      if (AFI->isThumbFunction())
+        RC = ARM::tGPRRegisterClass;
+      else
+        RC = ARM::GPRRegisterClass;
+
+      if (RegVT == MVT::f64) {
+        // f64 is passed in pairs of GPRs and must be combined.
+        RegVT = MVT::i32;
+      } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32)))
+        assert(0 && "RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+      // Transform the arguments stored in physical registers into virtual ones.
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+
+      // f64 is passed in i32 pairs and must be combined.
+      if (VA.needsCustom()) {
+        SDValue ArgValue2;
+
+        VA = ArgLocs[++i]; // skip ahead to next loc
+        if (VA.isMemLoc()) {
+          // must be APCS to split like this
+          unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+          int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
+
+          // Create load node to retrieve arguments from the stack.
+          SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+          ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0);
+        } else {
+          Reg = MF.addLiveIn(VA.getLocReg(), RC);
+          ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+        }
+
+        ArgValue = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64,
+                               ArgValue, ArgValue2);
+      }
+
+      // If this is an 8 or 16-bit value, it is really passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size.
+      switch (VA.getLocInfo()) {
+      default: assert(0 && "Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::BCvt:
+        ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue);
+        break;
+      case CCValAssign::SExt:
+        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+        break;
+      case CCValAssign::ZExt:
+        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+        break;
+      }
+
+      ArgValues.push_back(ArgValue);
+
+    } else { // VA.isRegLoc()
+
+      // sanity check
+      assert(VA.isMemLoc());
+      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
+
+      // Create load nodes to retrieve arguments from the stack.
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      ArgValues.push_back(DAG.getLoad(VA.getValVT(), dl, Root, FIN, NULL, 0));
+    }
+  }
+
+  // varargs
+  if (isVarArg) {
+    static const unsigned GPRArgRegs[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3
+    };
+
+    unsigned NumGPRs = CCInfo.getFirstUnallocated
+      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+
+    unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+    unsigned VARegSize = (4 - NumGPRs) * 4;
+    unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+    unsigned ArgOffset = 0;
+    if (VARegSaveSize) {
+      // If this function is vararg, store any remaining integer argument regs
+      // to their spots on the stack so that they may be loaded by deferencing
+      // the result of va_next.
+      AFI->setVarArgsRegSaveSize(VARegSaveSize);
+      ArgOffset = CCInfo.getNextStackOffset();
+      VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
+                                                 VARegSaveSize - VARegSize);
+      SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
+
+      SmallVector<SDValue, 4> MemOps;
+      for (; NumGPRs < 4; ++NumGPRs) {
+        TargetRegisterClass *RC;
+        if (AFI->isThumbFunction())
+          RC = ARM::tGPRRegisterClass;
+        else
+          RC = ARM::GPRRegisterClass;
+
+        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
+        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i32);
+        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
+        MemOps.push_back(Store);
+        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+                          DAG.getConstant(4, getPointerTy()));
+      }
+      if (!MemOps.empty())
+        Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                           &MemOps[0], MemOps.size());
+    } else
+      // This will point to the next argument passed via stack.
+      VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
+  }
+
+  ArgValues.push_back(Root);
+
+  // Return the new list of results.
+  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
+                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+    return CFP->getValueAPF().isPosZero();
+  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+    // Maybe this has already been legalized into the constant pool?
+    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+        if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+          return CFP->getValueAPF().isPosZero();
+    }
+  }
+  return false;
+}
+
+static bool isLegalCmpImmediate(unsigned C, bool isThumb) {
+  return ( isThumb && (C & ~255U) == 0) ||
+         (!isThumb && ARM_AM::getSOImmVal(C) != -1);
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                         SDValue &ARMCC, SelectionDAG &DAG, bool isThumb,
+                         DebugLoc dl) {
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+    unsigned C = RHSC->getZExtValue();
+    if (!isLegalCmpImmediate(C, isThumb)) {
+      // Constant does not fit, try adjusting it by one?
+      switch (CC) {
+      default: break;
+      case ISD::SETLT:
+      case ISD::SETGE:
+        if (isLegalCmpImmediate(C-1, isThumb)) {
+          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+          RHS = DAG.getConstant(C-1, MVT::i32);
+        }
+        break;
+      case ISD::SETULT:
+      case ISD::SETUGE:
+        if (C > 0 && isLegalCmpImmediate(C-1, isThumb)) {
+          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+          RHS = DAG.getConstant(C-1, MVT::i32);
+        }
+        break;
+      case ISD::SETLE:
+      case ISD::SETGT:
+        if (isLegalCmpImmediate(C+1, isThumb)) {
+          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+          RHS = DAG.getConstant(C+1, MVT::i32);
+        }
+        break;
+      case ISD::SETULE:
+      case ISD::SETUGT:
+        if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb)) {
+          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+          RHS = DAG.getConstant(C+1, MVT::i32);
+        }
+        break;
+      }
+    }
+  }
+
+  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+  ARMISD::NodeType CompareType;
+  switch (CondCode) {
+  default:
+    CompareType = ARMISD::CMP;
+    break;
+  case ARMCC::EQ:
+  case ARMCC::NE:
+  case ARMCC::MI:
+  case ARMCC::PL:
+    // Uses only N and Z Flags
+    CompareType = ARMISD::CMPNZ;
+    break;
+  }
+  ARMCC = DAG.getConstant(CondCode, MVT::i32);
+  return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+                         DebugLoc dl) {
+  SDValue Cmp;
+  if (!isFloatingPointZero(RHS))
+    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
+  else
+    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS);
+  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+                              const ARMSubtarget *ST) {
+  MVT VT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (LHS.getValueType() == MVT::i32) {
+    SDValue ARMCC;
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl);
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
+  }
+
+  ARMCC::CondCodes CondCode, CondCode2;
+  if (FPCCToARMCC(CC, CondCode, CondCode2))
+    std::swap(TrueVal, FalseVal);
+
+  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+                                 ARMCC, CCR, Cmp);
+  if (CondCode2 != ARMCC::AL) {
+    SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+    // FIXME: Needs another CMP because flag can have but one use.
+    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
+                         Result, TrueVal, ARMCC2, CCR, Cmp2);
+  }
+  return Result;
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
+                          const ARMSubtarget *ST) {
+  SDValue  Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue    LHS = Op.getOperand(2);
+  SDValue    RHS = Op.getOperand(3);
+  SDValue   Dest = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (LHS.getValueType() == MVT::i32) {
+    SDValue ARMCC;
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb(), dl);
+    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+                       Chain, Dest, ARMCC, CCR,Cmp);
+  }
+
+  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+  ARMCC::CondCodes CondCode, CondCode2;
+  if (FPCCToARMCC(CC, CondCode, CondCode2))
+    // Swap the LHS/RHS of the comparison if needed.
+    std::swap(LHS, RHS);
+
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+  SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+  SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
+  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+  if (CondCode2 != ARMCC::AL) {
+    ARMCC = DAG.getConstant(CondCode2, MVT::i32);
+    SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
+    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+  }
+  return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Table = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+
+  MVT PTy = getPointerTy();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+  SDValue UId =  DAG.getConstant(AFI->createJumpTableUId(), PTy);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+  bool isPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  Addr = DAG.getLoad(isPIC ? (MVT)MVT::i32 : PTy, dl,
+                     Chain, Addr, NULL, 0);
+  Chain = Addr.getValue(1);
+  if (isPIC)
+    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+  return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+}
+
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc =
+    Op.getOpcode() == ISD::FP_TO_SINT ? ARMISD::FTOSI : ARMISD::FTOUI;
+  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+}
+
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+  MVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc =
+    Op.getOpcode() == ISD::SINT_TO_FP ? ARMISD::SITOF : ARMISD::UITOF;
+
+  Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(Opc, dl, VT, Op);
+}
+
+static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+  // Implement fcopysign with a fabs and a conditional fneg.
+  SDValue Tmp0 = Op.getOperand(0);
+  SDValue Tmp1 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  MVT VT = Op.getValueType();
+  MVT SrcVT = Tmp1.getValueType();
+  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
+  SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
+  SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+  MVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces())
+    ? ARM::R7 : ARM::R11;
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+  return FrameAddr;
+}
+
+SDValue
+ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                           SDValue Chain,
+                                           SDValue Dst, SDValue Src,
+                                           SDValue Size, unsigned Align,
+                                           bool AlwaysInline,
+                                         const Value *DstSV, uint64_t DstSVOff,
+                                         const Value *SrcSV, uint64_t SrcSVOff){
+  // Do repeated 4-byte loads and stores. To be improved.
+  // This requires 4-byte alignment.
+  if ((Align & 3) != 0)
+    return SDValue();
+  // This requires the copy size to be a constant, preferrably
+  // within a subtarget-specific limit.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (!ConstantSize)
+    return SDValue();
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
+    return SDValue();
+
+  unsigned BytesLeft = SizeVal & 3;
+  unsigned NumMemOps = SizeVal >> 2;
+  unsigned EmittedNumMemOps = 0;
+  MVT VT = MVT::i32;
+  unsigned VTSize = 4;
+  unsigned i = 0;
+  const unsigned MAX_LOADS_IN_LDM = 6;
+  SDValue TFOps[MAX_LOADS_IN_LDM];
+  SDValue Loads[MAX_LOADS_IN_LDM];
+  uint64_t SrcOff = 0, DstOff = 0;
+
+  // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+  // same number of stores.  The loads and stores will get combined into
+  // ldm/stm later on.
+  while (EmittedNumMemOps < NumMemOps) {
+    for (i = 0;
+         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+      Loads[i] = DAG.getLoad(VT, dl, Chain,
+                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                         DAG.getConstant(SrcOff, MVT::i32)),
+                             SrcSV, SrcSVOff + SrcOff);
+      TFOps[i] = Loads[i].getValue(1);
+      SrcOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+    for (i = 0;
+         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+      TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+                           DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                       DAG.getConstant(DstOff, MVT::i32)),
+                           DstSV, DstSVOff + DstOff);
+      DstOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+    EmittedNumMemOps += i;
+  }
+
+  if (BytesLeft == 0)
+    return Chain;
+
+  // Issue loads / stores for the trailing (1 - 3) bytes.
+  unsigned BytesLeftSave = BytesLeft;
+  i = 0;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    Loads[i] = DAG.getLoad(VT, dl, Chain,
+                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                       DAG.getConstant(SrcOff, MVT::i32)),
+                           SrcSV, SrcSVOff + SrcOff);
+    TFOps[i] = Loads[i].getValue(1);
+    ++i;
+    SrcOff += VTSize;
+    BytesLeft -= VTSize;
+  }
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+  i = 0;
+  BytesLeft = BytesLeftSave;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                        DAG.getConstant(DstOff, MVT::i32)),
+                            DstSV, DstSVOff + DstOff);
+    ++i;
+    DstOff += VTSize;
+    BytesLeft -= VTSize;
+  }
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
+
+static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
+  SDValue Op = N->getOperand(0);
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0) == MVT::f64) {
+    // Turn i64->f64 into FMDRR.
+    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+                             DAG.getConstant(0, MVT::i32));
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+                             DAG.getConstant(1, MVT::i32));
+    return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+  }
+
+  // Turn f64->i64 into FMRRD.
+  SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl,
+                            DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+
+  // Merge the pieces into a single i64 value.
+  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+}
+
+static SDValue ExpandSRx(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) {
+  assert(N->getValueType(0) == MVT::i64 &&
+         (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+         "Unknown shift to lower!");
+
+  // We only lower SRA, SRL of 1 here, all others use generic lowering.
+  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+    return SDValue();
+
+  // If we are in thumb mode, we don't have RRX.
+  if (ST->isThumb()) return SDValue();
+
+  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+                             DAG.getConstant(0, MVT::i32));
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+                             DAG.getConstant(1, MVT::i32));
+
+  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+  // captures the result into a carry flag.
+  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1);
+
+  // The low part is an ARMISD::RRX operand, which shifts the carry in.
+  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+  // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+  switch (Op.getOpcode()) {
+  default: assert(0 && "Don't know how to custom lower this!"); abort();
+  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
+  case ISD::GlobalAddress:
+    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+      LowerGlobalAddressELF(Op, DAG);
+  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::CALL:          return LowerCALL(Op, DAG);
+  case ISD::RET:           return LowerRET(Op, DAG);
+  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG, Subtarget);
+  case ISD::BR_CC:         return LowerBR_CC(Op, DAG, Subtarget);
+  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
+  case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
+  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
+  case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG);
+  case ISD::RETURNADDR:    break;
+  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
+  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::BIT_CONVERT:   return ExpandBIT_CONVERT(Op.getNode(), DAG);
+  case ISD::SRL:
+  case ISD::SRA:           return ExpandSRx(Op.getNode(), DAG,Subtarget);
+  }
+  return SDValue();
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue>&Results,
+                                           SelectionDAG &DAG) {
+  switch (N->getOpcode()) {
+  default:
+    assert(0 && "Don't know how to custom expand this!");
+    return;
+  case ISD::BIT_CONVERT:
+    Results.push_back(ExpandBIT_CONVERT(N, DAG));
+    return;
+  case ISD::SRL:
+  case ISD::SRA: {
+    SDValue Res = ExpandSRx(N, DAG, Subtarget);
+    if (Res.getNode())
+      Results.push_back(Res);
+    return;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                           ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                               MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  switch (MI->getOpcode()) {
+  default: assert(false && "Unexpected instr type to insert");
+  case ARM::tMOVCCr: {
+    // To "insert" a SELECT_CC instruction, we actually have to insert the
+    // diamond control-flow pattern.  The incoming instruction knows the
+    // destination vreg to set, the condition code register to branch on, the
+    // true/false values to select between, and a branch opcode to use.
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    MachineFunction::iterator It = BB;
+    ++It;
+
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   cmpTY ccX, r1, r2
+    //   bCC copy1MBB
+    //   fallthrough --> copy0MBB
+    MachineBasicBlock *thisMBB  = BB;
+    MachineFunction *F = BB->getParent();
+    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
+    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+    F->insert(It, copy0MBB);
+    F->insert(It, sinkMBB);
+    // Update machine-CFG edges by first adding all successors of the current
+    // block to the new block which will contain the Phi node for the select.
+    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+        e = BB->succ_end(); i != e; ++i)
+      sinkMBB->addSuccessor(*i);
+    // Next, remove all successors of the current block, and add the true
+    // and fallthrough blocks as its successors.
+    while(!BB->succ_empty())
+      BB->removeSuccessor(BB->succ_begin());
+    BB->addSuccessor(copy0MBB);
+    BB->addSuccessor(sinkMBB);
+
+    //  copy0MBB:
+    //   %FalseValue = ...
+    //   # fallthrough to sinkMBB
+    BB = copy0MBB;
+
+    // Update machine-CFG edges
+    BB->addSuccessor(sinkMBB);
+
+    //  sinkMBB:
+    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+    //  ...
+    BB = sinkMBB;
+    BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+    F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
+    return BB;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                           ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+                            TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MVT VT = N->getValueType(0);
+  unsigned Opc = N->getOpcode();
+  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+  ISD::CondCode CC = ISD::SETCC_INVALID;
+
+  if (isSlctCC) {
+    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+  } else {
+    SDValue CCOp = Slct.getOperand(0);
+    if (CCOp.getOpcode() == ISD::SETCC)
+      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+  }
+
+  bool DoXform = false;
+  bool InvCC = false;
+  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+          "Bad input!");
+
+  if (LHS.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(LHS)->isNullValue()) {
+    DoXform = true;
+  } else if (CC != ISD::SETCC_INVALID &&
+             RHS.getOpcode() == ISD::Constant &&
+             cast<ConstantSDNode>(RHS)->isNullValue()) {
+    std::swap(LHS, RHS);
+    SDValue Op0 = Slct.getOperand(0);
+    MVT OpVT = isSlctCC ? Op0.getValueType() :
+                          Op0.getOperand(0).getValueType();
+    bool isInt = OpVT.isInteger();
+    CC = ISD::getSetCCInverse(CC, isInt);
+
+    if (!TLI.isCondCodeLegal(CC, OpVT))
+      return SDValue();         // Inverse operator isn't legal.
+
+    DoXform = true;
+    InvCC = true;
+  }
+
+  if (DoXform) {
+    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+    if (isSlctCC)
+      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+                             Slct.getOperand(0), Slct.getOperand(1), CC);
+    SDValue CCOp = Slct.getOperand(0);
+    if (InvCC)
+      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
+    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                       CCOp, OtherOp, Result);
+  }
+  return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+static SDValue PerformADDCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  // added by evan in r37685 with no testcase.
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+    if (Result.getNode()) return Result;
+  }
+  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+    if (Result.getNode()) return Result;
+  }
+
+  return SDValue();
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+static SDValue PerformSUBCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  // added by evan in r37685 with no testcase.
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+    if (Result.getNode()) return Result;
+  }
+
+  return SDValue();
+}
+
+
+/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
+static SDValue PerformFMRRDCombine(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI) {
+  // fmrrd(fmdrr x, y) -> x,y
+  SDValue InDouble = N->getOperand(0);
+  if (InDouble.getOpcode() == ARMISD::FMDRR)
+    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+  return SDValue();
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::ADD:      return PerformADDCombine(N, DCI);
+  case ISD::SUB:      return PerformSUBCombine(N, DCI);
+  case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI);
+  }
+
+  return SDValue();
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, MVT VT,
+                                    const ARMSubtarget *Subtarget) {
+  if (V == 0)
+    return true;
+
+  if (!VT.isSimple())
+    return false;
+
+  if (Subtarget->isThumb()) {
+    if (V < 0)
+      return false;
+
+    unsigned Scale = 1;
+    switch (VT.getSimpleVT()) {
+    default: return false;
+    case MVT::i1:
+    case MVT::i8:
+      // Scale == 1;
+      break;
+    case MVT::i16:
+      // Scale == 2;
+      Scale = 2;
+      break;
+    case MVT::i32:
+      // Scale == 4;
+      Scale = 4;
+      break;
+    }
+
+    if ((V & (Scale - 1)) != 0)
+      return false;
+    V /= Scale;
+    return V == (V & ((1LL << 5) - 1));
+  }
+
+  if (V < 0)
+    V = - V;
+  switch (VT.getSimpleVT()) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i32:
+    // +- imm12
+    return V == (V & ((1LL << 12) - 1));
+  case MVT::i16:
+    // +- imm8
+    return V == (V & ((1LL << 8) - 1));
+  case MVT::f32:
+  case MVT::f64:
+    if (!Subtarget->hasVFP2())
+      return false;
+    if ((V & 3) != 0)
+      return false;
+    V >>= 2;
+    return V == (V & ((1LL << 8) - 1));
+  }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                              const Type *Ty) const {
+  MVT VT = getValueType(Ty, true);
+  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+    return false;
+
+  // Can never fold addr of global into load/store.
+  if (AM.BaseGV)
+    return false;
+
+  switch (AM.Scale) {
+  case 0:  // no scale reg, must be "r+i" or "r", or "i".
+    break;
+  case 1:
+    if (Subtarget->isThumb())
+      return false;
+    // FALL THROUGH.
+  default:
+    // ARM doesn't support any R+R*scale+imm addr modes.
+    if (AM.BaseOffs)
+      return false;
+
+    if (!VT.isSimple())
+      return false;
+
+    int Scale = AM.Scale;
+    switch (VT.getSimpleVT()) {
+    default: return false;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i32:
+    case MVT::i64:
+      // This assumes i64 is legalized to a pair of i32. If not (i.e.
+      // ldrd / strd are used, then its address mode is same as i16.
+      // r + r
+      if (Scale < 0) Scale = -Scale;
+      if (Scale == 1)
+        return true;
+      // r + r << imm
+      return isPowerOf2_32(Scale & ~1);
+    case MVT::i16:
+      // r + r
+      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+        return true;
+      return false;
+
+    case MVT::isVoid:
+      // Note, we allow "void" uses (basically, uses that aren't loads or
+      // stores), because arm allows folding a scale into many arithmetic
+      // operations.  This should be made more precise and revisited later.
+
+      // Allow r << imm, but the imm has to be a multiple of two.
+      if (AM.Scale & 1) return false;
+      return isPowerOf2_32(AM.Scale);
+    }
+    break;
+  }
+  return true;
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, MVT VT,
+                                   bool isSEXTLoad, SDValue &Base,
+                                   SDValue &Offset, bool &isInc,
+                                   SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+    return false;
+
+  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+    // AddressingMode 3
+    Base = Ptr->getOperand(0);
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC < 0 && RHSC > -256) {
+        isInc = false;
+        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+        return true;
+      }
+    }
+    isInc = (Ptr->getOpcode() == ISD::ADD);
+    Offset = Ptr->getOperand(1);
+    return true;
+  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+    // AddressingMode 2
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC < 0 && RHSC > -0x1000) {
+        isInc = false;
+        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+        Base = Ptr->getOperand(0);
+        return true;
+      }
+    }
+
+    if (Ptr->getOpcode() == ISD::ADD) {
+      isInc = true;
+      ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+      if (ShOpcVal != ARM_AM::no_shift) {
+        Base = Ptr->getOperand(1);
+        Offset = Ptr->getOperand(0);
+      } else {
+        Base = Ptr->getOperand(0);
+        Offset = Ptr->getOperand(1);
+      }
+      return true;
+    }
+
+    isInc = (Ptr->getOpcode() == ISD::ADD);
+    Base = Ptr->getOperand(0);
+    Offset = Ptr->getOperand(1);
+    return true;
+  }
+
+  // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
+  return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                             SDValue &Offset,
+                                             ISD::MemIndexedMode &AM,
+                                             SelectionDAG &DAG) const {
+  if (Subtarget->isThumb())
+    return false;
+
+  MVT VT;
+  SDValue Ptr;
+  bool isSEXTLoad = false;
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    VT  = LD->getMemoryVT();
+    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    VT  = ST->getMemoryVT();
+  } else
+    return false;
+
+  bool isInc;
+  bool isLegal = getIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, Offset,
+                                        isInc, DAG);
+  if (isLegal) {
+    AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+    return true;
+  }
+  return false;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                   SDValue &Base,
+                                                   SDValue &Offset,
+                                                   ISD::MemIndexedMode &AM,
+                                                   SelectionDAG &DAG) const {
+  if (Subtarget->isThumb())
+    return false;
+
+  MVT VT;
+  SDValue Ptr;
+  bool isSEXTLoad = false;
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    VT  = LD->getMemoryVT();
+    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    VT  = ST->getMemoryVT();
+  } else
+    return false;
+
+  bool isInc;
+  bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                        isInc, DAG);
+  if (isLegal) {
+    AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+    return true;
+  }
+  return false;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                       const APInt &Mask,
+                                                       APInt &KnownZero,
+                                                       APInt &KnownOne,
+                                                       const SelectionDAG &DAG,
+                                                       unsigned Depth) const {
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  switch (Op.getOpcode()) {
+  default: break;
+  case ARMISD::CMOV: {
+    // Bits are known zero/one if known on the LHS and RHS.
+    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+    if (KnownZero == 0 && KnownOne == 0) return;
+
+    APInt KnownZeroRHS, KnownOneRHS;
+    DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
+                          KnownZeroRHS, KnownOneRHS, Depth+1);
+    KnownZero &= KnownZeroRHS;
+    KnownOne  &= KnownOneRHS;
+    return;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                           ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default:  break;
+    case 'l': return C_RegisterClass;
+    case 'w': return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                MVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC RS6000 Constraint Letters
+    switch (Constraint[0]) {
+    case 'l':
+      if (Subtarget->isThumb())
+        return std::make_pair(0U, ARM::tGPRRegisterClass);
+      else
+        return std::make_pair(0U, ARM::GPRRegisterClass);
+    case 'r':
+      return std::make_pair(0U, ARM::GPRRegisterClass);
+    case 'w':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, ARM::SPRRegisterClass);
+      if (VT == MVT::f64)
+        return std::make_pair(0U, ARM::DPRRegisterClass);
+      break;
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> ARMTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  MVT VT) const {
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {      // GCC ARM Constraint Letters
+  default: break;
+  case 'l':
+    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+                                 0);
+  case 'r':
+    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+                                 ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+                                 ARM::R12, ARM::LR, 0);
+  case 'w':
+    if (VT == MVT::f32)
+      return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+                                   ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+                                   ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+                                   ARM::S12,ARM::S13,ARM::S14,ARM::S15,
+                                   ARM::S16,ARM::S17,ARM::S18,ARM::S19,
+                                   ARM::S20,ARM::S21,ARM::S22,ARM::S23,
+                                   ARM::S24,ARM::S25,ARM::S26,ARM::S27,
+                                   ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
+    if (VT == MVT::f64)
+      return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+                                   ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+                                   ARM::D8, ARM::D9, ARM::D10,ARM::D11,
+                                   ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
+      break;
+  }
+
+  return std::vector<unsigned>();
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                     char Constraint,
+                                                     bool hasMemory,
+                                                     std::vector<SDValue>&Ops,
+                                                     SelectionDAG &DAG) const {
+  SDValue Result(0, 0);
+
+  switch (Constraint) {
+  default: break;
+  case 'I': case 'J': case 'K': case 'L':
+  case 'M': case 'N': case 'O':
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    if (!C)
+      return;
+
+    int64_t CVal64 = C->getSExtValue();
+    int CVal = (int) CVal64;
+    // None of these constraints allow values larger than 32 bits.  Check
+    // that the value fits in an int.
+    if (CVal != CVal64)
+      return;
+
+    switch (Constraint) {
+      case 'I':
+        if (Subtarget->isThumb()) {
+          // This must be a constant between 0 and 255, for ADD immediates.
+          if (CVal >= 0 && CVal <= 255)
+            break;
+        } else {
+          // A constant that can be used as an immediate value in a
+          // data-processing instruction.
+          if (ARM_AM::getSOImmVal(CVal) != -1)
+            break;
+        }
+        return;
+
+      case 'J':
+        if (Subtarget->isThumb()) {
+          // This must be a constant between -255 and -1, for negated ADD
+          // immediates. This can be used in GCC with an "n" modifier that
+          // prints the negated value, for use with SUB instructions. It is
+          // not useful otherwise but is implemented for compatibility.
+          if (CVal >= -255 && CVal <= -1)
+            break;
+        } else {
+          // This must be a constant between -4095 and 4095. It is not clear
+          // what this constraint is intended for. Implemented for
+          // compatibility with GCC.
+          if (CVal >= -4095 && CVal <= 4095)
+            break;
+        }
+        return;
+
+      case 'K':
+        if (Subtarget->isThumb()) {
+          // A 32-bit value where only one byte has a nonzero value. Exclude
+          // zero to match GCC. This constraint is used by GCC internally for
+          // constants that can be loaded with a move/shift combination.
+          // It is not useful otherwise but is implemented for compatibility.
+          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+            break;
+        } else {
+          // A constant whose bitwise inverse can be used as an immediate
+          // value in a data-processing instruction. This can be used in GCC
+          // with a "B" modifier that prints the inverted value, for use with
+          // BIC and MVN instructions. It is not useful otherwise but is
+          // implemented for compatibility.
+          if (ARM_AM::getSOImmVal(~CVal) != -1)
+            break;
+        }
+        return;
+
+      case 'L':
+        if (Subtarget->isThumb()) {
+          // This must be a constant between -7 and 7,
+          // for 3-operand ADD/SUB immediate instructions.
+          if (CVal >= -7 && CVal < 7)
+            break;
+        } else {
+          // A constant whose negation can be used as an immediate value in a
+          // data-processing instruction. This can be used in GCC with an "n"
+          // modifier that prints the negated value, for use with SUB
+          // instructions. It is not useful otherwise but is implemented for
+          // compatibility.
+          if (ARM_AM::getSOImmVal(-CVal) != -1)
+            break;
+        }
+        return;
+
+      case 'M':
+        if (Subtarget->isThumb()) {
+          // This must be a multiple of 4 between 0 and 1020, for
+          // ADD sp + immediate.
+          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+            break;
+        } else {
+          // A power of two or a constant between 0 and 32.  This is used in
+          // GCC for the shift amount on shifted register operands, but it is
+          // useful in general for any shift amounts.
+          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+            break;
+        }
+        return;
+
+      case 'N':
+        if (Subtarget->isThumb()) {
+          // This must be a constant between 0 and 31, for shift amounts.
+          if (CVal >= 0 && CVal <= 31)
+            break;
+        }
+        return;
+
+      case 'O':
+        if (Subtarget->isThumb()) {
+          // This must be a multiple of 4 between -508 and 508, for
+          // ADD/SUB sp = sp + immediate.
+          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+            break;
+        }
+        return;
+    }
+    Result = DAG.getTargetConstant(CVal, Op.getValueType());
+    break;
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
+                                                      Ops, DAG);
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 0000000..2dab2db
--- /dev/null
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,184 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include <vector>
+
+namespace llvm {
+  class ARMConstantPoolValue;
+
+  namespace ARMISD {
+    // ARM Specific DAG Nodes
+    enum NodeType {
+      // Start the numbering where the builtin ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      Wrapper,      // Wrapper - A wrapper node for TargetConstantPool,
+                    // TargetExternalSymbol, and TargetGlobalAddress.
+      WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
+
+      CALL,         // Function call.
+      CALL_PRED,    // Function call that's predicable.
+      CALL_NOLINK,  // Function call with branch not branch-and-link.
+      tCALL,        // Thumb function call.
+      BRCOND,       // Conditional branch.
+      BR_JT,        // Jumptable branch.
+      RET_FLAG,     // Return with a flag operand.
+
+      PIC_ADD,      // Add with a PC operand and a PIC label.
+
+      CMP,          // ARM compare instructions.
+      CMPNZ,        // ARM compare that uses only N or Z flags.
+      CMPFP,        // ARM VFP compare instruction, sets FPSCR.
+      CMPFPw0,      // ARM VFP compare against zero instruction, sets FPSCR.
+      FMSTAT,       // ARM fmstat instruction.
+      CMOV,         // ARM conditional move instructions.
+      CNEG,         // ARM conditional negate instructions.
+
+      FTOSI,        // FP to sint within a FP register.
+      FTOUI,        // FP to uint within a FP register.
+      SITOF,        // sint to FP within a FP register.
+      UITOF,        // uint to FP within a FP register.
+
+      SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+      SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+      RRX,          // V = RRX X, Flag     -> srl X, 1 + shift in carry flag.
+
+      FMRRD,        // double to two gprs.
+      FMDRR,        // Two gprs to double.
+
+      EH_SJLJ_SETJMP,    // SjLj exception handling setjmp
+      EH_SJLJ_LONGJMP,   // SjLj exception handling longjmp
+
+      THREAD_POINTER
+    };
+  }
+
+  //===--------------------------------------------------------------------===//
+  //  ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+  class ARMTargetLowering : public TargetLowering {
+    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
+  public:
+    explicit ARMTargetLowering(TargetMachine &TM);
+
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG);
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *MBB) const;
+
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+    /// getPreIndexedAddressParts - returns true by value, base pointer and
+    /// offset pointer and addressing mode by reference if the node's address
+    /// can be legally represented as pre-indexed load / store address.
+    virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                           SDValue &Offset,
+                                           ISD::MemIndexedMode &AM,
+                                           SelectionDAG &DAG) const;
+
+    /// getPostIndexedAddressParts - returns true by value, base pointer and
+    /// offset pointer and addressing mode by reference if this node can be
+    /// combined with a load / store to form a post-indexed load / store.
+    virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                            SDValue &Base, SDValue &Offset,
+                                            ISD::MemIndexedMode &AM,
+                                            SelectionDAG &DAG) const;
+
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth) const;
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+      getRegForInlineAsmConstraint(const std::string &Constraint,
+                                   MVT VT) const;
+    std::vector<unsigned>
+    getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                      MVT VT) const;
+
+    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+    /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
+    /// true it means one of the asm constraint of the inline asm instruction
+    /// being processed is 'm'.
+    virtual void LowerAsmOperandForConstraint(SDValue Op,
+                                              char ConstraintLetter,
+                                              bool hasMemory,
+                                              std::vector<SDValue> &Ops,
+                                              SelectionDAG &DAG) const;
+
+    virtual const ARMSubtarget* getSubtarget() {
+      return Subtarget;
+    }
+
+  private:
+    /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+    /// make the right decision when generating code for different targets.
+    const ARMSubtarget *Subtarget;
+
+    /// ARMPCLabelIndex - Keep track the number of ARM PC labels created.
+    ///
+    unsigned ARMPCLabelIndex;
+
+    SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
+                             const SDValue &StackPtr, const CCValAssign &VA,
+                             SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags);
+    SDNode *LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
+                            unsigned CallingConv, SelectionDAG &DAG);
+    SDValue LowerCALL(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerRET(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+                                            SelectionDAG &DAG);
+    SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+                                   SelectionDAG &DAG);
+    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+
+    SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                      SDValue Chain,
+                                      SDValue Dst, SDValue Src,
+                                      SDValue Size, unsigned Align,
+                                      bool AlwaysInline,
+                                      const Value *DstSV, uint64_t DstSVOff,
+                                      const Value *SrcSV, uint64_t SrcSVOff);
+  };
+}
+
+#endif  // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 0000000..9a1e1c2
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,868 @@
+//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<5> val> {
+  bits<5> Value = val;
+}
+
+def Pseudo        : Format<0>;
+def MulFrm        : Format<1>;
+def BrFrm         : Format<2>;
+def BrMiscFrm     : Format<3>;
+
+def DPFrm         : Format<4>;
+def DPSoRegFrm    : Format<5>;
+
+def LdFrm         : Format<6>;
+def StFrm         : Format<7>;
+def LdMiscFrm     : Format<8>;
+def StMiscFrm     : Format<9>;
+def LdStMulFrm    : Format<10>;
+
+def ArithMiscFrm  : Format<11>;
+def ExtFrm        : Format<12>;
+
+def VFPUnaryFrm   : Format<13>;
+def VFPBinaryFrm  : Format<14>;
+def VFPConv1Frm   : Format<15>;
+def VFPConv2Frm   : Format<16>;
+def VFPConv3Frm   : Format<17>;
+def VFPConv4Frm   : Format<18>;
+def VFPConv5Frm   : Format<19>;
+def VFPLdStFrm    : Format<20>;
+def VFPLdStMulFrm : Format<21>;
+def VFPMiscFrm    : Format<22>;
+
+def ThumbFrm      : Format<23>;
+
+// Misc flag for data processing instructions that indicates whether
+// the instruction has a Rn register operand.
+class UnaryDP  { bit isUnaryDataProc = 1; }
+
+//===----------------------------------------------------------------------===//
+
+// ARM Instruction templates.
+//
+
+class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
+              Format f, string cstr>
+  : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "ARM";
+
+  // TSFlagsFields
+  AddrMode AM = am;
+  bits<4> AddrModeBits = AM.Value;
+  
+  SizeFlagVal SZ = sz;
+  bits<3> SizeFlag = SZ.Value;
+
+  IndexMode IM = im;
+  bits<2> IndexModeBits = IM.Value;
+  
+  Format F = f;
+  bits<5> Form = F.Value;
+
+  //
+  // Attributes specific to ARM instructions...
+  //
+  bit isUnaryDataProc = 0;
+  
+  let Constraints = cstr;
+}
+
+class PseudoInst<dag oops, dag iops, string asm, list<dag> pattern>
+  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, ""> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString   = asm;
+  let Pattern = pattern;
+}
+
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+        IndexMode im, Format f, string opc, string asm, string cstr,
+        list<dag> pattern>
+  : InstARM<am, sz, im, f, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString   = !strconcat(opc, !strconcat("${p}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as
+// an input operand since by default it's a zero register. It will
+// become an implicit def once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+         IndexMode im, Format f, string opc, string asm, string cstr,
+         list<dag> pattern>
+  : InstARM<am, sz, im, f, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+  let AsmString   = !strconcat(opc, !strconcat("${p}${s}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+         IndexMode im, Format f, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString   = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, string opc,
+         string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, opc,
+       asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, string asm,
+          list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, asm,
+       "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, string opc,
+         string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, opc,
+      asm, "", pattern> {
+  let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, asm,
+       "", pattern> {
+  let Inst{27-24} = opcod;
+}
+class ABXIx2<dag oops, dag iops, string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, asm,
+       "", pattern>;
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm,
+       asm, "", pattern>;
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{24-21} = opcod;
+  let Inst{27-26} = {0,0};
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, string opc,
+           string asm, list<dag> pattern>
+  : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, opc,
+       asm, "", pattern> {
+  let Inst{24-21} = opcod;
+  let Inst{27-26} = {0,0};
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, asm,
+       "", pattern> {
+  let Inst{24-21} = opcod;
+  let Inst{27-26} = {0,0};
+}
+class AI1x2<dag oops, dag iops, Format f, string opc,
+            string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, opc,
+      asm, "", pattern>;
+
+
+// addrmode2 loads and stores
+class AI2<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{27-26} = {0,1};
+}
+
+// loads
+class AI2ldw<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 0; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AXI2ldw<dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+       asm, "", pattern> {
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 0; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AI2ldb<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 1; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AXI2ldb<dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+       asm, "", pattern> {
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 1; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+
+// stores
+class AI2stw<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 0; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AXI2stw<dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+       asm, "", pattern> {
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 0; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AI2stb<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 1; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AXI2stb<dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f,
+       asm, "", pattern> {
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 1; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+
+// Pre-indexed loads
+class AI2ldwpr<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+      asm, cstr, pattern> {
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{22}    = 0; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AI2ldbpr<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+      asm, cstr, pattern> {
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{22}    = 1; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+
+// Pre-indexed stores
+class AI2stwpr<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+      asm, cstr, pattern> {
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{22}    = 0; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AI2stbpr<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePre, f, opc,
+      asm, cstr, pattern> {
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{22}    = 1; // B bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-26} = {0,1};
+}
+
+// Post-indexed loads
+class AI2ldwpo<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+      asm, cstr,pattern> {
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 0; // B bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AI2ldbpo<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+      asm, cstr,pattern> {
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 1; // B bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-26} = {0,1};
+}
+
+// Post-indexed stores
+class AI2stwpo<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+      asm, cstr,pattern> {
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 0; // B bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-26} = {0,1};
+}
+class AI2stbpo<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, IndexModePost, f, opc,
+      asm, cstr,pattern> {
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{22}    = 1; // B bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-26} = {0,1};
+}
+
+// addrmode3 instructions
+class AI3<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern>;
+class AXI3<dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, asm,
+       "", pattern>;
+
+// loads
+class AI3ldh<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AXI3ldh<dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+       asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AI3ldsh<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AXI3ldsh<dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+       asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AI3ldsb<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 0; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AXI3ldsb<dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+       asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 0; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AI3ldd<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 0; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+
+// stores
+class AI3sth<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AXI3sth<dag oops, dag iops, Format f, string asm,
+           list<dag> pattern>
+  : XI<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f,
+       asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AI3std<dag oops, dag iops, Format f, string opc,
+          string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, opc,
+      asm, "", pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 1; // P bit
+}
+
+// Pre-indexed loads
+class AI3ldhpr<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+      asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AI3ldshpr<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+      asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+}
+class AI3ldsbpr<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+      asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 0; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+}
+
+// Pre-indexed stores
+class AI3sthpr<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, opc,
+      asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+}
+
+// Post-indexed loads
+class AI3ldhpo<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+      asm, cstr,pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 0; // P bit
+}
+class AI3ldshpo<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+      asm, cstr,pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 0; // P bit
+}
+class AI3ldsbpo<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+      asm, cstr,pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 0; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 1; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 0; // P bit
+}
+
+// Post-indexed stores
+class AI3sthpo<dag oops, dag iops, Format f, string opc,
+            string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, opc,
+      asm, cstr,pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 0; // P bit
+}
+
+
+// addrmode4 instructions
+class AXI4ld<dag oops, dag iops, Format f, string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm,
+       "", pattern> {
+  let Inst{20}    = 1; // L bit
+  let Inst{22}    = 0; // S bit
+  let Inst{27-25} = 0b100;
+}
+class AXI4st<dag oops, dag iops, Format f, string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode4, Size4Bytes, IndexModeNone, f, asm,
+       "", pattern> {
+  let Inst{20}    = 0; // L bit
+  let Inst{22}    = 0; // S bit
+  let Inst{27-25} = 0b100;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, string opc,
+         string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+      asm, "", pattern> {
+  let Inst{7-4}   = 0b1001;
+  let Inst{20}    = 0; // S bit
+  let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, string opc,
+          string asm, list<dag> pattern>
+  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+       asm, "", pattern> {
+  let Inst{7-4}   = 0b1001;
+  let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, dag oops, dag iops, string opc,
+         string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+      asm, "", pattern> {
+  let Inst{7-4}   = 0b1001;
+  let Inst{20}    = 1;
+  let Inst{27-21} = opcod;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyI<bits<7> opcod, dag oops, dag iops, string opc,
+         string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, opc,
+      asm, "", pattern> {
+  let Inst{4}     = 0;
+  let Inst{7}     = 1;
+  let Inst{20}    = 0;
+  let Inst{27-21} = opcod;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, string opc,
+            string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, opc,
+      asm, "", pattern> {
+  let Inst{7-4}   = 0b0111;
+  let Inst{27-20} = opcod;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, dag oops, dag iops, string opc,
+               string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, opc,
+      asm, "", pattern> {
+  let Inst{27-20} = opcod;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Thumb Instruction Format Definitions.
+//
+
+
+// TI - Thumb instruction.
+
+class ThumbI<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
+             string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  let OutOperandList = outs;
+  let InOperandList = ins;
+  let AsmString   = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb];
+}
+
+class TI<dag outs, dag ins, string asm, list<dag> pattern>
+  : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
+class TI1<dag outs, dag ins, string asm, list<dag> pattern>
+  : ThumbI<outs, ins, AddrModeT1, Size2Bytes, asm, "", pattern>;
+class TI2<dag outs, dag ins, string asm, list<dag> pattern>
+  : ThumbI<outs, ins, AddrModeT2, Size2Bytes, asm, "", pattern>;
+class TI4<dag outs, dag ins, string asm, list<dag> pattern>
+  : ThumbI<outs, ins, AddrModeT4, Size2Bytes, asm, "", pattern>;
+class TIs<dag outs, dag ins, string asm, list<dag> pattern>
+  : ThumbI<outs, ins, AddrModeTs, Size2Bytes, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag outs, dag ins, string asm, list<dag> pattern>
+  : ThumbI<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
+
+// BL, BLX(1) are translated by assembler into two instructions
+class TIx2<dag outs, dag ins, string asm, list<dag> pattern>
+  : ThumbI<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
+
+// BR_JT instructions
+class TJTI<dag outs, dag ins, string asm, list<dag> pattern>
+  : ThumbI<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>;
+
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+           string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+      VFPLdStFrm, opc, asm, "", pattern> {
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-24} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-8}  = 0b1011;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+           string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+      VFPLdStFrm, opc, asm, "", pattern> {
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-24} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-8}  = 0b1010;
+}
+
+// Load / store multiple
+class AXSI5<dag oops, dag iops, string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+       VFPLdStMulFrm, asm, "", pattern> {
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-25} = 0b110;
+  let Inst{11-8}  = 0b1011;
+}
+
+class AXDI5<dag oops, dag iops, string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+       VFPLdStMulFrm, asm, "", pattern> {
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-25} = 0b110;
+  let Inst{11-8}  = 0b1010;
+}
+
+
+// Double precision, unary
+class ADuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
+           string opc, string asm, list<dag> pattern>
+  : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> {
+  let Inst{27-20} = opcod1;
+  let Inst{19-16} = opcod2;
+  let Inst{11-8}  = 0b1011;
+  let Inst{7-4}   = opcod3;
+}
+
+// Double precision, binary
+class ADbI<bits<8> opcod, dag oops, dag iops, string opc,
+           string asm, list<dag> pattern>
+  : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> {
+  let Inst{27-20} = opcod;
+  let Inst{11-8}  = 0b1011;
+}
+
+// Single precision, unary
+class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops,
+           string opc, string asm, list<dag> pattern>
+  : AI<oops, iops, VFPUnaryFrm, opc, asm, pattern> {
+  // Bits 22 (D bit) and 5 (M bit) will be changed during instruction encoding.
+  let Inst{27-20} = opcod1;
+  let Inst{19-16} = opcod2;
+  let Inst{11-8}  = 0b1010;
+  let Inst{7-4}   = opcod3;
+}
+
+// Single precision, binary
+class ASbI<bits<8> opcod, dag oops, dag iops, string opc,
+           string asm, list<dag> pattern>
+  : AI<oops, iops, VFPBinaryFrm, opc, asm, pattern> {
+  // Bit 22 (D bit) can be changed during instruction encoding.
+  let Inst{27-20} = opcod;
+  let Inst{11-8}  = 0b1010;
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3,
+               dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : AI<oops, iops, VFPConv1Frm, opc, asm, pattern> {
+  let Inst{27-20} = opcod1;
+  let Inst{19-16} = opcod2;
+  let Inst{11-8}  = opcod3;
+  let Inst{6}     = 1;
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+             string opc, string asm, list<dag> pattern>
+  : AI<oops, iops, f, opc, asm, pattern> {
+  let Inst{27-20} = opcod1;
+  let Inst{11-8}  = opcod2;
+  let Inst{4}     = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+              string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+              string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+              string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, string opc,
+              string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+
+// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
+class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb];
+}
+
+class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb, HasV5T];
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 0000000..4b0dbb5
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,1025 @@
+//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+               cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+  return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+  return MIB.addReg(0);
+}
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
+    RI(*this, STI) {
+}
+
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+///
+bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
+                               unsigned &SrcReg, unsigned &DstReg,
+                               unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+  unsigned oc = MI.getOpcode();
+  switch (oc) {
+  default:
+    return false;
+  case ARM::FCPYS:
+  case ARM::FCPYD:
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  case ARM::MOVr:
+  case ARM::tMOVr:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2hir:
+    assert(MI.getDesc().getNumOperands() >= 2 &&
+           MI.getOperand(0).isReg() &&
+           MI.getOperand(1).isReg() &&
+           "Invalid ARM MOV instruction");
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+}
+
+unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                           int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::LDR:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FLDD:
+  case ARM::FLDS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::tRestore:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                          int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::STR:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::FSTD:
+  case ARM::FSTS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::tSpill:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I,
+                                 unsigned DestReg,
+                                 const MachineInstr *Orig) const {
+  DebugLoc dl = Orig->getDebugLoc();
+  if (Orig->getOpcode() == ARM::MOVi2pieces) {
+    RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(),
+                         Orig->getOperand(2).getImm(),
+                         Orig->getOperand(3).getReg(), this, false, dl);
+    return;
+  }
+
+  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+  MI->getOperand(0).setReg(DestReg);
+  MBB.insert(I, MI);
+}
+
+static unsigned getUnindexedOpcode(unsigned Opc) {
+  switch (Opc) {
+  default: break;
+  case ARM::LDR_PRE:
+  case ARM::LDR_POST:
+    return ARM::LDR;
+  case ARM::LDRH_PRE:
+  case ARM::LDRH_POST:
+    return ARM::LDRH;
+  case ARM::LDRB_PRE:
+  case ARM::LDRB_POST:
+    return ARM::LDRB;
+  case ARM::LDRSH_PRE:
+  case ARM::LDRSH_POST:
+    return ARM::LDRSH;
+  case ARM::LDRSB_PRE:
+  case ARM::LDRSB_POST:
+    return ARM::LDRSB;
+  case ARM::STR_PRE:
+  case ARM::STR_POST:
+    return ARM::STR;
+  case ARM::STRH_PRE:
+  case ARM::STRH_POST:
+    return ARM::STRH;
+  case ARM::STRB_PRE:
+  case ARM::STRB_POST:
+    return ARM::STRB;
+  }
+  return 0;
+}
+
+MachineInstr *
+ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                    MachineBasicBlock::iterator &MBBI,
+                                    LiveVariables *LV) const {
+  if (!EnableARM3Addr)
+    return NULL;
+
+  MachineInstr *MI = MBBI;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  unsigned TSFlags = MI->getDesc().TSFlags;
+  bool isPre = false;
+  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+  default: return NULL;
+  case ARMII::IndexModePre:
+    isPre = true;
+    break;
+  case ARMII::IndexModePost:
+    break;
+  }
+
+  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+  // operation.
+  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+  if (MemOpc == 0)
+    return NULL;
+
+  MachineInstr *UpdateMI = NULL;
+  MachineInstr *MemMI = NULL;
+  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned NumOps = TID.getNumOperands();
+  bool isLoad = !TID.mayStore();
+  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+  const MachineOperand &Base = MI->getOperand(2);
+  const MachineOperand &Offset = MI->getOperand(NumOps-3);
+  unsigned WBReg = WB.getReg();
+  unsigned BaseReg = Base.getReg();
+  unsigned OffReg = Offset.getReg();
+  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+  switch (AddrMode) {
+  default:
+    assert(false && "Unknown indexed op!");
+    return NULL;
+  case ARMII::AddrMode2: {
+    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+    if (OffReg == 0) {
+      int SOImmVal = ARM_AM::getSOImmVal(Amt);
+      if (SOImmVal == -1)
+        // Can't encode it in a so_imm operand. This transformation will
+        // add more than 1 instruction. Abandon!
+        return NULL;
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(SOImmVal)
+        .addImm(Pred).addReg(0).addReg(0);
+    } else if (Amt != 0) {
+      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+        .addImm(Pred).addReg(0).addReg(0);
+    } else 
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg)
+        .addImm(Pred).addReg(0).addReg(0);
+    break;
+  }
+  case ARMII::AddrMode3 : {
+    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+    if (OffReg == 0)
+      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(Amt)
+        .addImm(Pred).addReg(0).addReg(0);
+    else
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg)
+        .addImm(Pred).addReg(0).addReg(0);
+    break;
+  }
+  }
+
+  std::vector<MachineInstr*> NewMIs;
+  if (isPre) {
+    if (isLoad)
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+    else
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+    NewMIs.push_back(MemMI);
+    NewMIs.push_back(UpdateMI);
+  } else {
+    if (isLoad)
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+    else
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+    if (WB.isDead())
+      UpdateMI->getOperand(0).setIsDead();
+    NewMIs.push_back(UpdateMI);
+    NewMIs.push_back(MemMI);
+  }
+  
+  // Transfer LiveVariables states, kill / dead info.
+  if (LV) {
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.getReg() &&
+          TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+        unsigned Reg = MO.getReg();
+      
+        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+        if (MO.isDef()) {
+          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+          if (MO.isDead())
+            LV->addVirtualRegisterDead(Reg, NewMI);
+        }
+        if (MO.isUse() && MO.isKill()) {
+          for (unsigned j = 0; j < 2; ++j) {
+            // Look at the two new MI's in reverse order.
+            MachineInstr *NewMI = NewMIs[j];
+            if (!NewMI->readsRegister(Reg))
+              continue;
+            LV->addVirtualRegisterKilled(Reg, NewMI);
+            if (VI.removeKill(MI))
+              VI.Kills.push_back(NewMI);
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  MFI->insert(MBBI, NewMIs[1]);
+  MFI->insert(MBBI, NewMIs[0]);
+  return NewMIs[0];
+}
+
+// Branch analysis.
+bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+    return false;
+  
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+  
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(LastInst->getOperand(1));
+      Cond.push_back(LastInst->getOperand(2));
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+  
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+  
+  // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+  if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
+      (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+    TBB =  SecondLastInst->getOperand(0).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(1));
+    Cond.push_back(SecondLastInst->getOperand(2));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+  
+  // If the block ends with two unconditional branches, handle it.  The second 
+  // one is not executed, so remove it.
+  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &&
+      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // ...likewise if it ends with a branch table followed by an unconditional
+  // branch. The branch folder can create these, and we must get rid of them for
+  // correctness of Thumb constant islands.
+  if ((SecondLastOpc == ARM::BR_JTr || SecondLastOpc==ARM::BR_JTm ||
+       SecondLastOpc == ARM::BR_JTadd || SecondLastOpc==ARM::tBR_JTr) &&
+      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return true;
+  } 
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+
+unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+  int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
+    return 0;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  
+  I = MBB.end();
+  
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (I->getOpcode() != BccOpc)
+    return 1;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+unsigned
+ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                           MachineBasicBlock *FBB,
+                           const SmallVectorImpl<MachineOperand> &Cond) const {
+  // FIXME this should probably have a DebugLoc argument
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
+  int BccOpc = AFI->isThumbFunction() ? ARM::tBcc : ARM::Bcc;
+
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "ARM branch conditions have two components!");
+  
+  if (FBB == 0) {
+    if (Cond.empty()) // Unconditional branch?
+      BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
+    else
+      BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+    return 1;
+  }
+  
+  // Two-way conditional branch.
+  BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+  BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB);
+  return 2;
+}
+
+bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I,
+                                unsigned DestReg, unsigned SrcReg,
+                                const TargetRegisterClass *DestRC,
+                                const TargetRegisterClass *SrcRC) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (!AFI->isThumbFunction()) {
+    if (DestRC == ARM::GPRRegisterClass) {
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+                                  .addReg(SrcReg)));
+      return true;
+    }
+  } else {
+    if (DestRC == ARM::GPRRegisterClass) {
+      if (SrcRC == ARM::GPRRegisterClass) {
+        BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
+        return true;
+      } else if (SrcRC == ARM::tGPRRegisterClass) {
+        BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
+        return true;
+      }
+    } else if (DestRC == ARM::tGPRRegisterClass) {
+      if (SrcRC == ARM::GPRRegisterClass) {
+        BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
+        return true;
+      } else if (SrcRC == ARM::tGPRRegisterClass) {
+        BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
+        return true;
+      }
+    }
+  }
+  if (DestRC != SrcRC) {
+    // Not yet supported!
+    return false;
+  }
+
+
+  if (DestRC == ARM::SPRRegisterClass)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
+                   .addReg(SrcReg));
+  else if (DestRC == ARM::DPRRegisterClass)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
+                   .addReg(SrcReg));
+  else
+    return false;
+  
+  return true;
+}
+
+void ARMInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (RC == ARM::GPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    assert (!AFI->isThumbFunction());
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addReg(0).addImm(0));
+  } else if (RC == ARM::tGPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    assert (AFI->isThumbFunction());
+    BuildMI(MBB, I, DL, get(ARM::tSpill))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI).addImm(0);
+  } else if (RC == ARM::DPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0));
+  } else {
+    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0));
+  }
+}
+
+void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                  bool isKill,
+                                  SmallVectorImpl<MachineOperand> &Addr,
+                                  const TargetRegisterClass *RC,
+                                  SmallVectorImpl<MachineInstr*> &NewMIs) const{
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+  if (RC == ARM::GPRRegisterClass) {
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    if (AFI->isThumbFunction()) {
+      Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
+      MachineInstrBuilder MIB = 
+        BuildMI(MF, DL,  get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+      for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+        MIB.addOperand(Addr[i]);
+      NewMIs.push_back(MIB);
+      return;
+    }
+    Opc = ARM::STR;
+  } else if (RC == ARM::DPRRegisterClass) {
+    Opc = ARM::FSTD;
+  } else {
+    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+    Opc = ARM::FSTS;
+  }
+
+  MachineInstrBuilder MIB = 
+    BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  AddDefaultPred(MIB);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+void ARMInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (RC == ARM::GPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    assert (!AFI->isThumbFunction());
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
+                   .addFrameIndex(FI).addReg(0).addImm(0));
+  } else if (RC == ARM::tGPRRegisterClass) {
+    MachineFunction &MF = *MBB.getParent();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    assert (AFI->isThumbFunction());
+    BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+      .addFrameIndex(FI).addImm(0);
+  } else if (RC == ARM::DPRRegisterClass) {
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
+                   .addFrameIndex(FI).addImm(0));
+  } else {
+    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg)
+                   .addFrameIndex(FI).addImm(0));
+  }
+}
+
+void ARMInstrInfo::
+loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                SmallVectorImpl<MachineOperand> &Addr,
+                const TargetRegisterClass *RC,
+                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+  if (RC == ARM::GPRRegisterClass) {
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    if (AFI->isThumbFunction()) {
+      Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
+      MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+      for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+        MIB.addOperand(Addr[i]);
+      NewMIs.push_back(MIB);
+      return;
+    }
+    Opc = ARM::LDR;
+  } else if (RC == ARM::DPRRegisterClass) {
+    Opc = ARM::FLDD;
+  } else {
+    assert(RC == ARM::SPRRegisterClass && "Unknown regclass!");
+    Opc = ARM::FLDS;
+  }
+
+  MachineInstrBuilder MIB =  BuildMI(MF, DL, get(Opc), DestReg);
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  AddDefaultPred(MIB);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+bool ARMInstrInfo::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  if (!AFI->isThumbFunction() || CSI.empty())
+    return false;
+
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    MIB.addReg(Reg, RegState::Kill);
+  }
+  return true;
+}
+
+bool ARMInstrInfo::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  if (!AFI->isThumbFunction() || CSI.empty())
+    return false;
+
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
+  MBB.insert(MI, PopMI);
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (Reg == ARM::LR) {
+      // Special epilogue for vararg functions. See emitEpilogue
+      if (isVarArg)
+        continue;
+      Reg = ARM::PC;
+      PopMI->setDesc(get(ARM::tPOP_RET));
+      MBB.erase(MI);
+    }
+    PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
+  }
+  return true;
+}
+
+MachineInstr *ARMInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                      const SmallVectorImpl<unsigned> &Ops, int FI) const {
+  if (Ops.size() != 1) return NULL;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  MachineInstr *NewMI = NULL;
+  switch (Opc) {
+  default: break;
+  case ARM::MOVr: {
+    if (MI->getOperand(4).getReg() == ARM::CPSR)
+      // If it is updating CPSR, then it cannot be folded.
+      break;
+    unsigned Pred = MI->getOperand(2).getImm();
+    unsigned PredReg = MI->getOperand(3).getReg();
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      bool isDead = MI->getOperand(0).isDead();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
+        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+    }
+    break;
+  }
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        break;
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        break;
+      bool isDead = MI->getOperand(0).isDead();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
+        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addFrameIndex(FI).addImm(0);
+    }
+    break;
+  }
+  case ARM::FCPYS: {
+    unsigned Pred = MI->getOperand(2).getImm();
+    unsigned PredReg = MI->getOperand(3).getReg();
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
+        .addReg(SrcReg).addFrameIndex(FI)
+        .addImm(0).addImm(Pred).addReg(PredReg);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS), DstReg)
+        .addFrameIndex(FI)
+        .addImm(0).addImm(Pred).addReg(PredReg);
+    }
+    break;
+  }
+  case ARM::FCPYD: {
+    unsigned Pred = MI->getOperand(2).getImm();
+    unsigned PredReg = MI->getOperand(3).getReg();
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      bool isDead = MI->getOperand(0).isDead();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
+        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
+    }
+    break;
+  }
+  }
+
+  return NewMI;
+}
+
+bool ARMInstrInfo::
+canFoldMemoryOperand(const MachineInstr *MI,
+                     const SmallVectorImpl<unsigned> &Ops) const {
+  if (Ops.size() != 1) return false;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default: break;
+  case ARM::MOVr:
+    // If it is updating CPSR, then it cannot be folded.
+    return MI->getOperand(4).getReg() != ARM::CPSR;
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        return false;
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        return false;
+    }
+    return true;
+  }
+  case ARM::FCPYS:
+  case ARM::FCPYD:
+    return true;
+  }
+
+  return false;
+}
+
+bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+  if (MBB.empty()) return false;
+  
+  switch (MBB.back().getOpcode()) {
+  case ARM::BX_RET:   // Return.
+  case ARM::LDM_RET:
+  case ARM::tBX_RET:
+  case ARM::tBX_RET_vararg:
+  case ARM::tPOP_RET:
+  case ARM::B:
+  case ARM::tB:       // Uncond branch.
+  case ARM::tBR_JTr:
+  case ARM::BR_JTr:   // Jumptable branch.
+  case ARM::BR_JTm:   // Jumptable branch through mem.
+  case ARM::BR_JTadd: // Jumptable branch add to pc.
+    return true;
+  default: return false;
+  }
+}
+
+bool ARMInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+  return false;
+}
+
+bool ARMInstrInfo::isPredicated(const MachineInstr *MI) const {
+  int PIdx = MI->findFirstPredOperandIdx();
+  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
+bool ARMInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::B || Opc == ARM::tB) {
+    MI->setDesc(get(Opc == ARM::B ? ARM::Bcc : ARM::tBcc));
+    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+    return true;
+  }
+
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx != -1) {
+    MachineOperand &PMO = MI->getOperand(PIdx);
+    PMO.setImm(Pred[0].getImm());
+    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+    return true;
+  }
+  return false;
+}
+
+bool ARMInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  if (Pred1.size() > 2 || Pred2.size() > 2)
+    return false;
+
+  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+  if (CC1 == CC2)
+    return true;
+
+  switch (CC1) {
+  default:
+    return false;
+  case ARMCC::AL:
+    return true;
+  case ARMCC::HS:
+    return CC2 == ARMCC::HI;
+  case ARMCC::LS:
+    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+  case ARMCC::GE:
+    return CC2 == ARMCC::GT;
+  case ARMCC::LE:
+    return CC2 == ARMCC::LT;
+  }
+}
+
+bool ARMInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                    std::vector<MachineOperand> &Pred) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+    return false;
+
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+      Pred.push_back(MO);
+      Found = true;
+    }
+  }
+
+  return Found;
+}
+
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) DISABLE_INLINE;
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) {
+  return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  const MachineBasicBlock &MBB = *MI->getParent();
+  const MachineFunction *MF = MBB.getParent();
+  const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
+
+  // Basic size info comes from the TSFlags field.
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned TSFlags = TID.TSFlags;
+  
+  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+  default: {
+    // If this machine instr is an inline asm, measure it.
+    if (MI->getOpcode() == ARM::INLINEASM)
+      return TAI->getInlineAsmLength(MI->getOperand(0).getSymbolName());
+    if (MI->isLabel())
+      return 0;
+    switch (MI->getOpcode()) {
+    default:
+      assert(0 && "Unknown or unset size field for instr!");
+      break;
+    case TargetInstrInfo::IMPLICIT_DEF:
+    case TargetInstrInfo::DECLARE:
+    case TargetInstrInfo::DBG_LABEL:
+    case TargetInstrInfo::EH_LABEL:
+      return 0;
+    }
+    break;
+  }
+  case ARMII::Size8Bytes: return 8;          // Arm instruction x 2.
+  case ARMII::Size4Bytes: return 4;          // Arm instruction.
+  case ARMII::Size2Bytes: return 2;          // Thumb instruction.
+  case ARMII::SizeSpecial: {
+    switch (MI->getOpcode()) {
+    case ARM::CONSTPOOL_ENTRY:
+      // If this machine instr is a constant pool entry, its size is recorded as
+      // operand #2.
+      return MI->getOperand(2).getImm();
+    case ARM::Int_eh_sjlj_setjmp: return 12;
+    case ARM::BR_JTr:
+    case ARM::BR_JTm:
+    case ARM::BR_JTadd:
+    case ARM::tBR_JTr: {
+      // These are jumptable branches, i.e. a branch followed by an inlined
+      // jumptable. The size is 4 + 4 * number of entries.
+      unsigned NumOps = TID.getNumOperands();
+      MachineOperand JTOP =
+        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+      unsigned JTI = JTOP.getIndex();
+      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+      assert(JTI < JT.size());
+      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+      // 4 aligned. The assembler / linker may add 2 byte padding just before
+      // the JT entries.  The size does not include this padding; the
+      // constant islands pass does separate bookkeeping for it.
+      // FIXME: If we know the size of the function is less than (1 << 16) *2
+      // bytes, we can use 16-bit entries instead. Then there won't be an
+      // alignment issue.
+      return getNumJTEntries(JT, JTI) * 4 + 
+             (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4);
+    }
+    default:
+      // Otherwise, pseudo-instruction sizes are zero.
+      return 0;
+    }
+  }
+  }
+  return 0; // Not reached
+}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 0000000..13ff3fe
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,258 @@
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARM.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+  enum {
+    //===------------------------------------------------------------------===//
+    // Instruction Flags.
+
+    //===------------------------------------------------------------------===//
+    // This four-bit field describes the addressing mode used.
+
+    AddrModeMask  = 0xf,
+    AddrModeNone  = 0,
+    AddrMode1     = 1,
+    AddrMode2     = 2,
+    AddrMode3     = 3,
+    AddrMode4     = 4,
+    AddrMode5     = 5,
+    AddrModeT1    = 6,
+    AddrModeT2    = 7,
+    AddrModeT4    = 8,
+    AddrModeTs    = 9,  // i8 * 4 for pc and sp relative data
+
+    // Size* - Flags to keep track of the size of an instruction.
+    SizeShift     = 4,
+    SizeMask      = 7 << SizeShift,
+    SizeSpecial   = 1,   // 0 byte pseudo or special case.
+    Size8Bytes    = 2,
+    Size4Bytes    = 3,
+    Size2Bytes    = 4,
+    
+    // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
+    // and store ops 
+    IndexModeShift = 7,
+    IndexModeMask  = 3 << IndexModeShift,
+    IndexModePre   = 1,
+    IndexModePost  = 2,
+    
+    //===------------------------------------------------------------------===//
+    // Misc flags.
+
+    // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+    // it doesn't have a Rn operand.
+    UnaryDP       = 1 << 9,
+
+    //===------------------------------------------------------------------===//
+    // Instruction encoding formats.
+    //
+    FormShift     = 10,
+    FormMask      = 0x1f << FormShift,
+
+    // Pseudo instructions
+    Pseudo        = 0  << FormShift,
+
+    // Multiply instructions
+    MulFrm        = 1  << FormShift,
+
+    // Branch instructions
+    BrFrm         = 2  << FormShift,
+    BrMiscFrm     = 3  << FormShift,
+
+    // Data Processing instructions
+    DPFrm         = 4  << FormShift,
+    DPSoRegFrm    = 5  << FormShift,
+
+    // Load and Store
+    LdFrm         = 6  << FormShift,
+    StFrm         = 7  << FormShift,
+    LdMiscFrm     = 8  << FormShift,
+    StMiscFrm     = 9  << FormShift,
+    LdStMulFrm    = 10 << FormShift,
+
+    // Miscellaneous arithmetic instructions
+    ArithMiscFrm  = 11 << FormShift,
+
+    // Extend instructions
+    ExtFrm        = 12 << FormShift,
+
+    // VFP formats
+    VFPUnaryFrm   = 13 << FormShift,
+    VFPBinaryFrm  = 14 << FormShift,
+    VFPConv1Frm   = 15 << FormShift,
+    VFPConv2Frm   = 16 << FormShift,
+    VFPConv3Frm   = 17 << FormShift,
+    VFPConv4Frm   = 18 << FormShift,
+    VFPConv5Frm   = 19 << FormShift,
+    VFPLdStFrm    = 20 << FormShift,
+    VFPLdStMulFrm = 21 << FormShift,
+    VFPMiscFrm    = 22 << FormShift,
+
+    // Thumb format
+    ThumbFrm      = 23 << FormShift,
+
+    //===------------------------------------------------------------------===//
+    // Field shifts - such shifts are used to set field while generating
+    // machine instructions.
+    M_BitShift     = 5,
+    ShiftImmShift  = 5,
+    ShiftShift     = 7,
+    N_BitShift     = 7,
+    ImmHiShift     = 8,
+    SoRotImmShift  = 8,
+    RegRsShift     = 8,
+    ExtRotImmShift = 10,
+    RegRdLoShift   = 12,
+    RegRdShift     = 12,
+    RegRdHiShift   = 16,
+    RegRnShift     = 16,
+    S_BitShift     = 20,
+    W_BitShift     = 21,
+    AM3_I_BitShift = 22,
+    D_BitShift     = 22,
+    U_BitShift     = 23,
+    P_BitShift     = 24,
+    I_BitShift     = 25,
+    CondShift      = 28
+  };
+}
+
+class ARMInstrInfo : public TargetInstrInfoImpl {
+  const ARMRegisterInfo RI;
+public:
+  explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// Return true if the instruction is a register to register move and return
+  /// the source and dest operands and their sub-register indices by reference.
+  virtual bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+  
+  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                     unsigned DestReg, const MachineInstr *Orig) const;
+
+  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables *LV) const;
+
+  // Branch analysis.
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                            const SmallVectorImpl<MachineOperand> &Cond) const;
+  virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DestReg, unsigned SrcReg,
+                            const TargetRegisterClass *DestRC,
+                            const TargetRegisterClass *SrcRC) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+
+  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC) const;
+
+  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+  
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              int FrameIndex) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const {
+    return 0;
+  }
+
+  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+  
+  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  // Predication support.
+  virtual bool isPredicated(const MachineInstr *MI) const;
+
+  ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+    int PIdx = MI->findFirstPredOperandIdx();
+    return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm() 
+                      : ARMCC::AL;
+  }
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+    
+  /// GetInstSize - Returns the size of the specified MachineInstr.
+  ///
+  virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 0000000..680e772
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,1390 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall    : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+
+def SDT_ARMCMov    : SDTypeProfile<1, 3,
+                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                                    SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond  : SDTypeProfile<0, 2,
+                                   [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT    : SDTypeProfile<0, 3,
+                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+                                   SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp     : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+                                          SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
+
+// Node definitions.
+def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
+def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+                              [SDNPHasChain, SDNPOutFlag]>;
+def ARMcallseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeqEnd,
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def ARMcall          : SDNode<"ARMISD::CALL", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_pred    : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+def ARMcall_nolink   : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def ARMretflag       : SDNode<"ARMISD::RET_FLAG", SDTNone,
+                              [SDNPHasChain, SDNPOptInFlag]>;
+
+def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+                              [SDNPInFlag]>;
+def ARMcneg          : SDNode<"ARMISD::CNEG", SDT_ARMCMov,
+                              [SDNPInFlag]>;
+
+def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+                              [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+                              [SDNPHasChain]>;
+
+def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+                              [SDNPOutFlag]>;
+
+def ARMcmpNZ         : SDNode<"ARMISD::CMPNZ", SDT_ARMCmp,
+                              [SDNPOutFlag]>;
+
+def ARMpic_add       : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag      : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMsra_flag      : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
+def ARMrrx           : SDNode<"ARMISD::RRX"     , SDTIntUnaryOp, [SDNPInFlag ]>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV5T   : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE  : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6    : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb  : Predicate<"Subtarget->isThumb()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
+def IsARM    : Predicate<"!Subtarget->isThumb()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+  string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+//  ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_XFORM - Return a so_imm value packed into the format described for
+// so_imm def below.
+def so_imm_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(N->getZExtValue()),
+                                   MVT::i32);
+}]>;
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(-(int)N->getZExtValue()),
+                                   MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(~(int)N->getZExtValue()),
+                                   MVT::i32);
+}]>;
+
+// rot_imm predicate - True if the 32-bit immediate is equal to 8, 16, or 24.
+def rot_imm : PatLeaf<(i32 imm), [{
+  int32_t v = (int32_t)N->getZExtValue();
+  return v == 8 || v == 16 || v == 24;
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : PatLeaf<(i32 imm), [{
+  return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : PatLeaf<(i32 imm), [{
+  return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
+}]>;
+
+def so_imm_neg : 
+  PatLeaf<(imm), [{
+    return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1;
+  }], so_imm_neg_XFORM>;
+
+def so_imm_not :
+  PatLeaf<(imm), [{
+    return ARM_AM::getSOImmVal(~(int)N->getZExtValue()) != -1;
+  }], so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+  return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+def brtarget : Operand<OtherVT>;
+
+// A list of registers separated by comma. Used by load/store multiple.
+def reglist : Operand<i32> {
+  let PrintMethod = "printRegisterList";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+  let PrintMethod = "printCPInstOperand";
+}
+
+def jtblock_operand : Operand<i32> {
+  let PrintMethod = "printJTBlockOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+  let PrintMethod = "printPCLabel";
+}
+
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>,    // reg reg imm
+            ComplexPattern<i32, 3, "SelectShifterOperandReg",
+                            [shl,srl,sra,rotr]> {
+  let PrintMethod = "printSORegOperand";
+  let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits.  so_imm values are
+// represented in the imm field in the same 12-bit form that they are encoded
+// into so_imm instructions: the 8-bit immediate is the least significant bits
+// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
+def so_imm : Operand<i32>,
+             PatLeaf<(imm),
+                     [{ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1; }],
+                     so_imm_XFORM> {
+  let PrintMethod = "printSOImmOperand";
+}
+
+// Break so_imm's up into two pieces.  This handles immediates with up to 16
+// bits set in them.  This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : Operand<i32>,
+                  PatLeaf<(imm), [{
+      return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+    }]> {
+  let PrintMethod = "printSOImm2PartOperand";
+}
+
+def so_imm2part_1 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getSOImmTwoPartFirst((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+def so_imm2part_2 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getSOImmTwoPartSecond((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(ARM_AM::getSOImmVal(V), MVT::i32);
+}]>;
+
+
+// Define ARM specific addressing modes.
+
+// addrmode2 := reg +/- reg shop imm
+// addrmode2 := reg +/- imm12
+//
+def addrmode2 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+  let PrintMethod = "printAddrMode2Operand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am2offset : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode2Offset", []> {
+  let PrintMethod = "printAddrMode2OffsetOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+  let PrintMethod = "printAddrMode3Operand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am3offset : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode3Offset", []> {
+  let PrintMethod = "printAddrMode3OffsetOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode4 := reg, <mode|W>
+//
+def addrmode4 : Operand<i32>,
+                ComplexPattern<i32, 2, "", []> {
+  let PrintMethod = "printAddrMode4Operand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+  let PrintMethod = "printAddrMode5Operand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+                 ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+  let PrintMethod = "printAddrModePCOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+                                     (ops (i32 14), (i32 zero_reg))> {
+  let PrintMethod = "printPredicateOperand";
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+//
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+  let PrintMethod = "printSBitModifierOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags.  These need to match ARMInstrInfo.h.
+//
+
+// Addressing mode.
+class AddrMode<bits<4> val> {
+  bits<4> Value = val;
+}
+def AddrModeNone : AddrMode<0>;
+def AddrMode1    : AddrMode<1>;
+def AddrMode2    : AddrMode<2>;
+def AddrMode3    : AddrMode<3>;
+def AddrMode4    : AddrMode<4>;
+def AddrMode5    : AddrMode<5>;
+def AddrModeT1   : AddrMode<6>;
+def AddrModeT2   : AddrMode<7>;
+def AddrModeT4   : AddrMode<8>;
+def AddrModeTs   : AddrMode<9>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+  bits<3> Value = val;
+}
+def SizeInvalid  : SizeFlagVal<0>;  // Unset.
+def SizeSpecial  : SizeFlagVal<1>;  // Pseudo or special.
+def Size8Bytes   : SizeFlagVal<2>;
+def Size4Bytes   : SizeFlagVal<3>;
+def Size2Bytes   : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+  bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre  : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+  def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+               opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+  def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+               opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+  def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+               opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+
+/// ASI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CSPR register.
+let Defs = [CPSR] in {
+multiclass ASI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode> {
+  def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+               opc, "s $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+  def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+               opc, "s $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+  def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+               opc, "s $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
+  def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm,
+               opc, " $a, $b",
+               [(opnode GPR:$a, so_imm:$b)]>;
+  def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm,
+               opc, " $a, $b",
+               [(opnode GPR:$a, GPR:$b)]>;
+  def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+               opc, " $a, $b",
+               [(opnode GPR:$a, so_reg:$b)]>;
+}
+}
+
+/// AI_unary_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+  def r     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src),
+                 opc, " $dst, $Src",
+                 [(set GPR:$dst, (opnode GPR:$Src))]>,
+              Requires<[IsARM, HasV6]> {
+                let Inst{19-16} = 0b1111;
+              }
+  def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$Src, i32imm:$rot),
+                 opc, " $dst, $Src, ror $rot",
+                 [(set GPR:$dst, (opnode (rotr GPR:$Src, rot_imm:$rot)))]>,
+              Requires<[IsARM, HasV6]> {
+                let Inst{19-16} = 0b1111;
+              }
+}
+
+/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+  def rr     : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
+                  opc, " $dst, $LHS, $RHS",
+                  [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+                  Requires<[IsARM, HasV6]>;
+  def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+                  opc, " $dst, $LHS, $RHS, ror $rot",
+                  [(set GPR:$dst, (opnode GPR:$LHS,
+                                          (rotr GPR:$RHS, rot_imm:$rot)))]>,
+                  Requires<[IsARM, HasV6]>;
+}
+
+/// AsXI1_bin_c_irs - Same as AsI1_bin_irs but without the predicate operand and
+/// setting carry bit. But it can optionally set CPSR.
+let Uses = [CPSR] in {
+multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> {
+  def ri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
+                DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
+  def rr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b, cc_out:$s),
+                DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+  def rs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
+                DPSoRegFrm, !strconcat(opc, "${s} $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function.  The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+                    i32imm:$size),
+           "${instid:label} ${cpidx:cpentry}", []>;
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p),
+           "@ ADJCALLSTACKUP $amt1",
+           [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN : 
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p),
+           "@ ADJCALLSTACKDOWN $amt",
+           [(ARMcallseq_start timm:$amt)]>;
+}
+
+def DWARF_LOC :
+PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file),
+           ".loc $file, $line, $col",
+           [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>;
+
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+                  Pseudo, "$cp:\n\tadd$p $dst, pc, $a",
+                   [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let AddedComplexity = 10 in {
+let canFoldAsLoad = 1 in
+def PICLDR  : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+                  Pseudo, "${addr:label}:\n\tldr$p $dst, $addr",
+                  [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+                  Pseudo, "${addr:label}:\n\tldr${p}h $dst, $addr",
+                  [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+                  Pseudo, "${addr:label}:\n\tldr${p}b $dst, $addr",
+                  [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+                  Pseudo, "${addr:label}:\n\tldr${p}sh $dst, $addr",
+                  [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+                  Pseudo, "${addr:label}:\n\tldr${p}sb $dst, $addr",
+                  [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR  : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+               Pseudo, "${addr:label}:\n\tstr$p $src, $addr",
+               [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+               Pseudo, "${addr:label}:\n\tstr${p}h $src, $addr",
+               [(truncstorei16 GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+               Pseudo, "${addr:label}:\n\tstr${p}b $src, $addr",
+               [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in
+  def BX_RET : AI<(outs), (ins), BrMiscFrm, "bx", " lr", [(ARMretflag)]> {
+  let Inst{7-4}   = 0b0001;
+  let Inst{19-8}  = 0b111111111111;
+  let Inst{27-20} = 0b00010010;
+}
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: $dst1 should be a def. But the extra ops must be in the end of the
+// operand list.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1 in
+  def LDM_RET : AXI4ld<(outs),
+                    (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+                    LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
+                    []>;
+
+let isCall = 1,
+  Defs = [R0, R1, R2, R3, R12, LR,
+          D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
+  def BL  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
+                "bl ${func:call}",
+                [(ARMcall tglobaladdr:$func)]>;
+
+  def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops),
+                   "bl", " ${func:call}",
+                   [(ARMcall_pred tglobaladdr:$func)]>;
+
+  // ARMv5T and above
+  def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+                "blx $func",
+                [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T]> {
+    let Inst{7-4}   = 0b0011;
+    let Inst{19-8}  = 0b111111111111;
+    let Inst{27-20} = 0b00010010;
+  }
+
+  let Uses = [LR] in {
+    // ARMv4T
+    def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+                     "mov lr, pc\n\tbx $func",
+                    [(ARMcall_nolink GPR:$func)]>;
+  }
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  // B is "predicable" since it can be xformed into a Bcc.
+  let isBarrier = 1 in {
+    let isPredicable = 1 in
+    def B : ABXI<0b1010, (outs), (ins brtarget:$target), "b $target",
+                [(br bb:$target)]>;
+
+  let isNotDuplicable = 1, isIndirectBranch = 1 in {
+  def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
+                    "mov pc, $target \n$jt",
+                    [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
+    let Inst{20}    = 0; // S Bit
+    let Inst{24-21} = 0b1101;
+    let Inst{27-26} = {0,0};
+  }
+  def BR_JTm : JTI<(outs),
+                   (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
+                   "ldr pc, $target \n$jt",
+                  [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+                    imm:$id)]> {
+    let Inst{20}    = 1; // L bit
+    let Inst{21}    = 0; // W bit
+    let Inst{22}    = 0; // B bit
+    let Inst{24}    = 1; // P bit
+    let Inst{27-26} = {0,1};
+  }
+  def BR_JTadd : JTI<(outs),
+                   (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
+                     "add pc, $target, $idx \n$jt",
+                    [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+                      imm:$id)]> {
+    let Inst{20}    = 0; // S bit
+    let Inst{24-21} = 0b0100;
+    let Inst{27-26} = {0,0};
+  }
+  } // isNotDuplicable = 1, isIndirectBranch = 1
+  } // isBarrier = 1
+
+  // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+  // a two-value operand where a dag node expects two operands. :( 
+  def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
+               "b", " $target",
+               [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1 in 
+def LDR  : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+               "ldr", " $dst, $addr",
+               [(set GPR:$dst, (load addrmode2:$addr))]>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+                 "ldr", " $dst, $addr", []>;
+
+// Loads with zero extension
+def LDRH  : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+                 "ldr", "h $dst, $addr",
+                [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
+
+def LDRB  : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+                 "ldr", "b $dst, $addr",
+                [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+                 "ldr", "sh $dst, $addr",
+                [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+                 "ldr", "sb $dst, $addr",
+                [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1 in {
+// Load doubleword
+def LDRD  : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
+                 "ldr", "d $dst, $addr",
+                []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed loads
+def LDR_PRE  : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
+                     (ins addrmode2:$addr), LdFrm,
+                     "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
+                     (ins GPR:$base, am2offset:$offset), LdFrm,
+                     "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRH_PRE  : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
+                     (ins addrmode3:$addr), LdMiscFrm,
+                     "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
+                     (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+                     "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRB_PRE  : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
+                     (ins addrmode2:$addr), LdFrm,
+                     "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
+                     (ins GPR:$base,am2offset:$offset), LdFrm,
+                     "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
+                      (ins addrmode3:$addr), LdMiscFrm,
+                      "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
+                      (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+                    "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>;
+
+def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
+                      (ins addrmode3:$addr), LdMiscFrm,
+                      "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>;
+
+def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
+                      (ins GPR:$base,am3offset:$offset), LdMiscFrm,
+                      "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>;
+}
+
+// Store
+def STR  : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
+               "str", " $src, $addr",
+               [(store GPR:$src, addrmode2:$addr)]>;
+
+// Stores with truncate
+def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+               "str", "h $src, $addr",
+               [(truncstorei16 GPR:$src, addrmode3:$addr)]>;
+
+def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
+               "str", "b $src, $addr",
+               [(truncstorei8 GPR:$src, addrmode2:$addr)]>;
+
+// Store doubleword
+let mayStore = 1 in
+def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+               "str", "d $src, $addr",
+               []>, Requires<[IsARM, HasV5T]>;
+
+// Indexed stores
+def STR_PRE  : AI2stwpr<(outs GPR:$base_wb),
+                     (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm,
+                    "str", " $src, [$base, $offset]!", "$base = $base_wb",
+                    [(set GPR:$base_wb,
+                      (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STR_POST : AI2stwpo<(outs GPR:$base_wb),
+                     (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+                    "str", " $src, [$base], $offset", "$base = $base_wb",
+                    [(set GPR:$base_wb,
+                      (post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
+
+def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
+                     (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm,
+                     "str", "h $src, [$base, $offset]!", "$base = $base_wb",
+                    [(set GPR:$base_wb,
+                      (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
+
+def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
+                     (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm,
+                     "str", "h $src, [$base], $offset", "$base = $base_wb",
+                    [(set GPR:$base_wb, (post_truncsti16 GPR:$src,
+                                         GPR:$base, am3offset:$offset))]>;
+
+def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
+                     (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+                     "str", "b $src, [$base, $offset]!", "$base = $base_wb",
+                    [(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
+                                         GPR:$base, am2offset:$offset))]>;
+
+def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
+                     (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm,
+                     "str", "b $src, [$base], $offset", "$base = $base_wb",
+                    [(set GPR:$base_wb, (post_truncsti8 GPR:$src,
+                                         GPR:$base, am2offset:$offset))]>;
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+// FIXME: $dst1 should be a def.
+let mayLoad = 1 in
+def LDM : AXI4ld<(outs),
+               (ins addrmode4:$addr, pred:$p, reglist:$dst1, variable_ops),
+               LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
+               []>;
+
+let mayStore = 1 in
+def STM : AXI4st<(outs),
+               (ins addrmode4:$addr, pred:$p, reglist:$src1, variable_ops),
+               LdStMulFrm, "stm${p}${addr:submode} $addr, $src1",
+               []>;
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+
+def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm,
+                 "mov", " $dst, $src", []>, UnaryDP;
+def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
+                 "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm,
+                 "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP;
+
+def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+                 "mov", " $dst, $src, rrx",
+                 [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+                      "mov", "s $dst, $src, lsr #1",
+                      [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
+def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+                      "mov", "s $dst, $src, asr #1",
+                      [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
+}
+
+//===----------------------------------------------------------------------===//
+//  Extend Instructions.
+//
+
+// Sign extenders
+
+defm SXTB  : AI_unary_rrot<0b01101010,
+                           "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH  : AI_unary_rrot<0b01101011,
+                           "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm SXTAB : AI_bin_rrot<0b01101010,
+               "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_bin_rrot<0b01101011,
+               "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// TODO: SXT(A){B|H}16
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm UXTB   : AI_unary_rrot<0b01101110,
+                            "uxtb"  , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH   : AI_unary_rrot<0b01101111,
+                            "uxth"  , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_unary_rrot<0b01101100,
+                            "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+def : ARMV6Pat<(and (shl GPR:$Src, 8), 0xFF00FF),
+               (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, 8), 0xFF00FF),
+               (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_bin_rrot<0b01101110, "uxtab",
+                        BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
+                        BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>;
+
+// TODO: UXT(A){B|H}16
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+defm ADD  : AsI1_bin_irs<0b0100, "add",
+                         BinOpFrag<(add  node:$LHS, node:$RHS)>>;
+defm SUB  : AsI1_bin_irs<0b0010, "sub",
+                         BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set.
+defm ADDS : ASI1_bin_s_irs<0b0100, "add",
+                           BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm SUBS : ASI1_bin_s_irs<0b0010, "sub",
+                           BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+// FIXME: Do not allow ADC / SBC to be predicated for now.
+defm ADC  : AsXI1_bin_c_irs<0b0101, "adc",
+                            BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm SBC  : AsXI1_bin_c_irs<0b0110, "sbc",
+                            BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+// These don't define reg/reg forms, because they are handled above.
+def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+                  "rsb", " $dst, $a, $b",
+                  [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]>;
+
+def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+                  "rsb", " $dst, $a, $b",
+                  [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>;
+
+// RSB with 's' bit set.
+let Defs = [CPSR] in {
+def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
+                 "rsb", "s $dst, $a, $b",
+                 [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]>;
+def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
+                 "rsb", "s $dst, $a, $b",
+                 [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>;
+}
+
+// FIXME: Do not allow RSC to be predicated for now. But they can set CPSR.
+let Uses = [CPSR] in {
+def RSCri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
+                 DPFrm, "rsc${s} $dst, $a, $b",
+                 [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>;
+def RSCrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
+                 DPSoRegFrm, "rsc${s} $dst, $a, $b",
+                 [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>;
+}
+
+// (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
+             (SUBri  GPR:$src, so_imm_neg:$imm)>;
+
+//def : ARMPat<(addc   GPR:$src, so_imm_neg:$imm),
+//             (SUBSri GPR:$src, so_imm_neg:$imm)>;
+//def : ARMPat<(adde   GPR:$src, so_imm_neg:$imm),
+//             (SBCri  GPR:$src, so_imm_neg:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+
+//===----------------------------------------------------------------------===//
+//  Bitwise Instructions.
+//
+
+defm AND   : AsI1_bin_irs<0b0000, "and",
+                          BinOpFrag<(and node:$LHS, node:$RHS)>>;
+defm ORR   : AsI1_bin_irs<0b1100, "orr",
+                          BinOpFrag<(or  node:$LHS, node:$RHS)>>;
+defm EOR   : AsI1_bin_irs<0b0001, "eor",
+                          BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+defm BIC   : AsI1_bin_irs<0b1110, "bic",
+                          BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+def  MVNr  : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm,
+                  "mvn", " $dst, $src",
+                  [(set GPR:$dst, (not GPR:$src))]>, UnaryDP;
+def  MVNs  : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
+                  "mvn", " $dst, $src",
+                  [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def  MVNi  : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
+                  "mvn", " $dst, $imm",
+                  [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP;
+
+def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
+             (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+//  Multiply Instructions.
+//
+
+def MUL   : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                    "mul", " $dst, $a, $b",
+                   [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
+
+def MLA   : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+                    "mla", " $dst, $a, $b, $c",
+                   [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
+
+// Extra precision multiplies with low / high results
+def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
+                               (ins GPR:$a, GPR:$b),
+                    "smull", " $ldst, $hdst, $a, $b", []>;
+
+def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
+                               (ins GPR:$a, GPR:$b),
+                    "umull", " $ldst, $hdst, $a, $b", []>;
+
+// Multiply + accumulate
+def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
+                               (ins GPR:$a, GPR:$b),
+                    "smlal", " $ldst, $hdst, $a, $b", []>;
+
+def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
+                               (ins GPR:$a, GPR:$b),
+                    "umlal", " $ldst, $hdst, $a, $b", []>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
+                               (ins GPR:$a, GPR:$b),
+                    "umaal", " $ldst, $hdst, $a, $b", []>,
+                    Requires<[IsARM, HasV6]>;
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+               "smmul", " $dst, $a, $b",
+               [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{7-4}   = 0b0001;
+  let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+               "smmla", " $dst, $a, $b, $c",
+               [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{7-4}   = 0b0001;
+}
+
+
+def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+               "smmls", " $dst, $a, $b, $c",
+               [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{7-4}   = 0b1101;
+}
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+  def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+              !strconcat(opc, "bb"), " $dst, $a, $b",
+              [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+                                      (sext_inreg GPR:$b, i16)))]>,
+           Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 0;
+             let Inst{6} = 0;
+           }
+
+  def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+              !strconcat(opc, "bt"), " $dst, $a, $b",
+              [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
+                                      (sra GPR:$b, 16)))]>,
+           Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 0;
+             let Inst{6} = 1;
+           }
+
+  def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+              !strconcat(opc, "tb"), " $dst, $a, $b",
+              [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+                                      (sext_inreg GPR:$b, i16)))]>,
+           Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 1;
+             let Inst{6} = 0;
+           }
+
+  def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+              !strconcat(opc, "tt"), " $dst, $a, $b",
+              [(set GPR:$dst, (opnode (sra GPR:$a, 16),
+                                      (sra GPR:$b, 16)))]>,
+            Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 1;
+             let Inst{6} = 1;
+           }
+
+  def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+              !strconcat(opc, "wb"), " $dst, $a, $b",
+              [(set GPR:$dst, (sra (opnode GPR:$a,
+                                    (sext_inreg GPR:$b, i16)), 16))]>,
+           Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 1;
+             let Inst{6} = 0;
+           }
+
+  def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+              !strconcat(opc, "wt"), " $dst, $a, $b",
+              [(set GPR:$dst, (sra (opnode GPR:$a,
+                                    (sra GPR:$b, 16)), 16))]>,
+            Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 1;
+             let Inst{6} = 1;
+           }
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+  def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+              !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc,
+                               (opnode (sext_inreg GPR:$a, i16),
+                                       (sext_inreg GPR:$b, i16))))]>,
+           Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 0;
+             let Inst{6} = 0;
+           }
+
+  def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+              !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
+                                                     (sra GPR:$b, 16))))]>,
+           Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 0;
+             let Inst{6} = 1;
+           }
+
+  def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+              !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+                                                 (sext_inreg GPR:$b, i16))))]>,
+           Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 1;
+             let Inst{6} = 0;
+           }
+
+  def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+              !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, 16),
+                                                     (sra GPR:$b, 16))))]>,
+            Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 1;
+             let Inst{6} = 1;
+           }
+
+  def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+              !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+                                            (sext_inreg GPR:$b, i16)), 16)))]>,
+           Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 0;
+             let Inst{6} = 0;
+           }
+
+  def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+              !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
+              [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
+                                                   (sra GPR:$b, 16)), 16)))]>,
+            Requires<[IsARM, HasV5TE]> {
+             let Inst{5} = 0;
+             let Inst{6} = 1;
+           }
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// TODO: Halfword multiple accumulate long: SMLAL<x><y>
+// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+
+//===----------------------------------------------------------------------===//
+//  Misc. Arithmetic Instructions.
+//
+
+def CLZ  : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src),
+              "clz", " $dst, $src",
+              [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> {
+  let Inst{7-4}   = 0b0001;
+  let Inst{11-8}  = 0b1111;
+  let Inst{19-16} = 0b1111;
+}
+
+def REV  : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
+              "rev", " $dst, $src",
+              [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> {
+  let Inst{7-4}   = 0b0011;
+  let Inst{11-8}  = 0b1111;
+  let Inst{19-16} = 0b1111;
+}
+
+def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src),
+               "rev16", " $dst, $src",
+               [(set GPR:$dst,
+                   (or (and (srl GPR:$src, 8), 0xFF),
+                       (or (and (shl GPR:$src, 8), 0xFF00),
+                           (or (and (srl GPR:$src, 8), 0xFF0000),
+                               (and (shl GPR:$src, 8), 0xFF000000)))))]>,
+               Requires<[IsARM, HasV6]> {
+  let Inst{7-4}   = 0b1011;
+  let Inst{11-8}  = 0b1111;
+  let Inst{19-16} = 0b1111;
+}
+
+def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src),
+               "revsh", " $dst, $src",
+               [(set GPR:$dst,
+                  (sext_inreg
+                    (or (srl (and GPR:$src, 0xFF00), 8),
+                        (shl GPR:$src, 8)), i16))]>,
+               Requires<[IsARM, HasV6]> {
+  let Inst{7-4}   = 0b1011;
+  let Inst{11-8}  = 0b1111;
+  let Inst{19-16} = 0b1111;
+}
+
+def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
+                                 (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
+               "pkhbt", " $dst, $src1, $src2, LSL $shamt",
+               [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
+                                   (and (shl GPR:$src2, (i32 imm:$shamt)),
+                                        0xFFFF0000)))]>,
+               Requires<[IsARM, HasV6]> {
+  let Inst{6-4} = 0b001;
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
+               (PKHBT GPR:$src1, GPR:$src2, 0)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
+               (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+
+
+def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
+                                 (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
+               "pkhtb", " $dst, $src1, $src2, ASR $shamt",
+               [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
+                                   (and (sra GPR:$src2, imm16_31:$shamt),
+                                        0xFFFF)))]>, Requires<[IsARM, HasV6]> {
+  let Inst{6-4} = 0b101;
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes.  Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, 16)),
+               (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+                   (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
+               (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+
+//===----------------------------------------------------------------------===//
+//  Comparison Instructions...
+//
+
+defm CMP  : AI1_cmp_irs<0b1010, "cmp",
+                        BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+defm CMN  : AI1_cmp_irs<0b1011, "cmn",
+                        BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST  : AI1_cmp_irs<0b1000, "tst",
+                        BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>>;
+defm TEQ  : AI1_cmp_irs<0b1001, "teq",
+                        BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>>;
+
+defm CMPnz : AI1_cmp_irs<0b1010, "cmp",
+                         BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
+defm CMNnz : AI1_cmp_irs<0b1011, "cmn",
+                         BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
+
+def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+             (CMNri  GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpNZ GPR:$src, so_imm_neg:$imm),
+             (CMNri  GPR:$src, so_imm_neg:$imm)>;
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :( 
+def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
+                "mov", " $dst, $true",
+      [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $dst">, UnaryDP;
+
+def MOVCCs : AI1<0b1101, (outs GPR:$dst),
+                        (ins GPR:$false, so_reg:$true), DPSoRegFrm,
+                "mov", " $dst, $true",
+   [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $dst">, UnaryDP;
+
+def MOVCCi : AI1<0b1101, (outs GPR:$dst),
+                        (ins GPR:$false, so_imm:$true), DPFrm,
+                "mov", " $dst, $true",
+   [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $dst">, UnaryDP;
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo,
+                   !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+                                         "${:private}PCRELL${:uid}+8))\n"),
+                              !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                         "add$p $dst, pc, #PCRELV${:uid}")),
+                   []>;
+
+def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p),
+          Pseudo,
+          !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+                                         "${:private}PCRELL${:uid}+8))\n"),
+                              !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                         "add$p $dst, pc, #PCRELV${:uid}")),
+                   []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+  Defs = [R0, R12, LR, CPSR] in {
+  def TPsoft : ABXI<0b1011, (outs), (ins),
+               "bl __aeabi_read_tp",
+               [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//   eh_sjlj_setjmp() is a three instruction sequence to store the return 
+//   address and save #0 in R0 for the non-longjmp case.
+//   Since by its nature we may be coming from some other function to get
+//   here, and we're using the stack frame for the containing function to
+//   save/restore registers, we can't keep anything live in regs across
+//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+//   when we get here from a longjmp(). We force everthing out of registers
+//   except for our own input by listing the relevant registers in Defs. By
+//   doing so, we also cause the prologue/epilogue code to actively preserve
+//   all of the callee-saved resgisters, which is exactly what we want.
+let Defs = 
+  [ R0, R1, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR,
+    D0, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15 ] in {
+  def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src),
+                               AddrModeNone, SizeSpecial, IndexModeNone, Pseudo,
+                               "add r0, pc, #4\n\t"
+                               "str r0, [$src, #+4]\n\t"
+                               "mov r0, #0 @ eh_setjmp", "",
+                               [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>;
+def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+             (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Large immediate handling.
+
+// Two piece so_imms.
+let isReMaterializable = 1 in
+def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo,
+                         "mov", " $dst, $src",
+                         [(set GPR:$dst, so_imm2part:$src)]>;
+
+def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
+              (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+                     (so_imm2part_2 imm:$RHS))>;
+def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
+              (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
+                     (so_imm2part_2 imm:$RHS))>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode2:$addr),  (LDRB addrmode2:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1  addrmode2:$addr),  (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi8  addrmode2:$addr),  (LDRB addrmode2:$addr)>;
+def : ARMPat<(extloadi16 addrmode3:$addr),  (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8  addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra (shl GPR:$b, 16), 16)),
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16)),
+                 (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, 16)),
+                 (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16)),
+                 (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, 16), sext_16_node:$b),
+                (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16),
+                 (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), 16),
+                 (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra (shl GPR:$a, 16), 16),
+                           (sra (shl GPR:$b, 16), 16))),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul sext_16_node:$a, sext_16_node:$b)),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra (shl GPR:$a, 16), 16), (sra GPR:$b, 16))),
+                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul sext_16_node:$a, (sra GPR:$b, 16))),
+                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra GPR:$a, 16), (sra (shl GPR:$b, 16), 16))),
+                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra GPR:$a, 16), sext_16_node:$b)),
+                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (sra (mul GPR:$a, (sra (shl GPR:$b, 16), 16)), 16)),
+                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (sra (mul GPR:$a, sext_16_node:$b), 16)),
+                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 0000000..ffb83a8
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,562 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+                      [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+
+/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
+def imm0_7 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() < 8;
+}]>;
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+  return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+  return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+  unsigned Val = -N->getZExtValue();
+  return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift.
+// This uses thumb_immshifted to match and thumb_immshifted_val and
+// thumb_immshifted_shamt to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+  return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// Define Thumb specific addressing modes.
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr : Operand<i32>,
+                    ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_s4 := reg + reg
+//                  reg + imm5 * 4
+//
+def t_addrmode_s4 : Operand<i32>,
+                    ComplexPattern<i32, 3, "SelectThumbAddrModeS4", []> {
+  let PrintMethod = "printThumbAddrModeS4Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+}
+
+// t_addrmode_s2 := reg + reg
+//                  reg + imm5 * 2
+//
+def t_addrmode_s2 : Operand<i32>,
+                    ComplexPattern<i32, 3, "SelectThumbAddrModeS2", []> {
+  let PrintMethod = "printThumbAddrModeS2Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+}
+
+// t_addrmode_s1 := reg + reg
+//                  reg + imm5
+//
+def t_addrmode_s1 : Operand<i32>,
+                    ComplexPattern<i32, 3, "SelectThumbAddrModeS1", []> {
+  let PrintMethod = "printThumbAddrModeS1Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm, tGPR:$offsreg);
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+def t_addrmode_sp : Operand<i32>,
+                    ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+  let PrintMethod = "printThumbAddrModeSPOperand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+}
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+let Defs = [SP], Uses = [SP] in {
+def tADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+           "@ tADJCALLSTACKUP $amt1",
+           [(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb]>;
+
+def tADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt),
+           "@ tADJCALLSTACKDOWN $amt",
+           [(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb]>;
+}
+
+let isNotDuplicable = 1 in
+def tPICADD : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
+                  "$cp:\n\tadd $dst, pc",
+                  [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>;
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1 in {
+  def tBX_RET : TI<(outs), (ins), "bx lr", [(ARMretflag)]>;
+  // Alternative return instruction used by vararg functions.
+  def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), "bx $target", []>;
+}
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1 in
+def tPOP_RET : TI<(outs reglist:$dst1, variable_ops), (ins),
+                   "pop $dst1", []>;
+
+let isCall = 1,
+  Defs = [R0, R1, R2, R3, LR,
+          D0, D1, D2, D3, D4, D5, D6, D7] in {
+  def tBL  : TIx2<(outs), (ins i32imm:$func, variable_ops),
+                   "bl ${func:call}",
+                   [(ARMtcall tglobaladdr:$func)]>;
+  // ARMv5T and above
+  def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops),
+                    "blx ${func:call}",
+                    [(ARMcall tglobaladdr:$func)]>, Requires<[HasV5T]>;
+  def tBLXr : TI<(outs), (ins tGPR:$func, variable_ops),
+                  "blx $func",
+                  [(ARMtcall tGPR:$func)]>, Requires<[HasV5T]>;
+  // ARMv4T
+  def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops),
+                  "cpy lr, pc\n\tbx $func",
+                  [(ARMcall_nolink tGPR:$func)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  let isBarrier = 1 in {
+    let isPredicable = 1 in
+    def tB   : TI<(outs), (ins brtarget:$target), "b $target",
+                  [(br bb:$target)]>;
+
+  // Far jump
+  def tBfar : TIx2<(outs), (ins brtarget:$target), "bl $target\t@ far jump",[]>;
+
+  def tBR_JTr : TJTI<(outs),
+                     (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
+                     "cpy pc, $target \n\t.align\t2\n$jt",
+                     [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>;
+  }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+  def tBcc : TI<(outs), (ins brtarget:$target, pred:$cc), "b$cc $target",
+                 [/*(ARMbrcond bb:$target, imm:$cc)*/]>;
+
+//===----------------------------------------------------------------------===//
+//  Load Store Instructions.
+//
+
+let canFoldAsLoad = 1 in
+def tLDR : TI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr),
+               "ldr $dst, $addr",
+               [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>;
+
+def tLDRB : TI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr),
+                "ldrb $dst, $addr",
+                [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>;
+
+def tLDRH : TI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr),
+                "ldrh $dst, $addr",
+                [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>;
+
+def tLDRSB : TI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+                 "ldrsb $dst, $addr",
+                 [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+def tLDRSH : TI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+                 "ldrsh $dst, $addr",
+                 [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+let canFoldAsLoad = 1 in
+def tLDRspi : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+                  "ldr $dst, $addr",
+                  [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>;
+
+// Special instruction for restore. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+let canFoldAsLoad = 1, mayLoad = 1 in
+def tRestore : TIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr),
+                    "ldr $dst, $addr", []>;
+
+// Load tconstpool
+let canFoldAsLoad = 1 in
+def tLDRpci : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+                  "ldr $dst, $addr",
+                  [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+def tLDRcp  : TIs<(outs tGPR:$dst), (ins i32imm:$addr),
+                  "ldr $dst, $addr", []>;
+
+def tSTR : TI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr),
+               "str $src, $addr",
+               [(store tGPR:$src, t_addrmode_s4:$addr)]>;
+
+def tSTRB : TI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr),
+                 "strb $src, $addr",
+                 [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>;
+
+def tSTRH : TI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr),
+                 "strh $src, $addr",
+                 [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>;
+
+def tSTRspi : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+                   "str $src, $addr",
+                   [(store tGPR:$src, t_addrmode_sp:$addr)]>;
+
+let mayStore = 1 in {
+// Special instruction for spill. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+def tSpill : TIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr),
+                  "str $src, $addr", []>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+// TODO: A7-44: LDMIA - load multiple
+
+let mayLoad = 1 in
+def tPOP : TI<(outs reglist:$dst1, variable_ops), (ins),
+               "pop $dst1", []>;
+
+let mayStore = 1 in
+def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
+                "push $src1", []>;
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+// Add with carry
+def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+               "adc $dst, $rhs",
+               [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
+
+def tADDS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+               "add $dst, $lhs, $rhs",
+               [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>;
+
+
+def tADDi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                "add $dst, $lhs, $rhs",
+                [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>;
+
+def tADDi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                 "add $dst, $rhs",
+                 [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>;
+
+def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "add $dst, $lhs, $rhs",
+                [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
+
+def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                   "add $dst, $rhs @ addhirr", []>;
+
+def tADDrPCi : TI<(outs tGPR:$dst), (ins i32imm:$rhs),
+                  "add $dst, pc, $rhs * 4", []>;
+
+def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
+                  "add $dst, $sp, $rhs * 4 @ addrspi", []>;
+
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                  "add $dst, $rhs * 4", []>;
+
+def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "and $dst, $rhs",
+                [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
+
+def tASRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                "asr $dst, $lhs, $rhs",
+                [(set tGPR:$dst, (sra tGPR:$lhs, imm:$rhs))]>;
+
+def tASRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                 "asr $dst, $rhs",
+                 [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>;
+
+def tBIC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+               "bic $dst, $rhs",
+               [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>;
+
+
+def tCMN : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+              "cmn $lhs, $rhs",
+              [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMPi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+               "cmp $lhs, $rhs",
+               [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
+
+def tCMPr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+               "cmp $lhs, $rhs",
+               [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
+
+def tTST  : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+               "tst $lhs, $rhs",
+               [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
+
+def tCMNNZ : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+                "cmn $lhs, $rhs",
+                [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMPNZi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+                 "cmp $lhs, $rhs",
+                 [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>;
+
+def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+                 "cmp $lhs, $rhs",
+                 [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>;
+
+// TODO: A7-37: CMP(3) - cmp hi regs
+
+def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+               "eor $dst, $rhs",
+               [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
+
+def tLSLri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                "lsl $dst, $lhs, $rhs",
+                [(set tGPR:$dst, (shl tGPR:$lhs, imm:$rhs))]>;
+
+def tLSLrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                 "lsl $dst, $rhs",
+                 [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>;
+
+def tLSRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                "lsr $dst, $lhs, $rhs",
+                [(set tGPR:$dst, (srl tGPR:$lhs, imm:$rhs))]>;
+
+def tLSRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                 "lsr $dst, $rhs",
+                 [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>;
+
+// FIXME: This is not rematerializable because mov changes the condition code.
+def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),
+                 "mov $dst, $src",
+                 [(set tGPR:$dst, imm0_255:$src)]>;
+
+// TODO: A7-73: MOV(2) - mov setting flag.
+
+
+// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
+// which is MOV(3).  This also supports high registers.
+def tMOVr       : TI<(outs tGPR:$dst), (ins tGPR:$src),
+                      "cpy $dst, $src", []>;
+def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src),
+                      "cpy $dst, $src\t@ hir2lor", []>;
+def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src),
+                      "cpy $dst, $src\t@ lor2hir", []>;
+def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
+                      "cpy $dst, $src\t@ hir2hir", []>;
+
+def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+               "mul $dst, $rhs",
+               [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
+
+def tMVN : TI<(outs tGPR:$dst), (ins tGPR:$src),
+              "mvn $dst, $src",
+              [(set tGPR:$dst, (not tGPR:$src))]>;
+
+def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src),
+              "neg $dst, $src",
+              [(set tGPR:$dst, (ineg tGPR:$src))]>;
+
+def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+               "orr $dst, $rhs",
+               [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
+
+
+def tREV : TI<(outs tGPR:$dst), (ins tGPR:$src),
+              "rev $dst, $src",
+              [(set tGPR:$dst, (bswap tGPR:$src))]>,
+              Requires<[IsThumb, HasV6]>;
+
+def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src),
+                "rev16 $dst, $src",
+                [(set tGPR:$dst,
+                    (or (and (srl tGPR:$src, 8), 0xFF),
+                        (or (and (shl tGPR:$src, 8), 0xFF00),
+                            (or (and (srl tGPR:$src, 8), 0xFF0000),
+                                (and (shl tGPR:$src, 8), 0xFF000000)))))]>,
+                Requires<[IsThumb, HasV6]>;
+
+def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src),
+                "revsh $dst, $src",
+                [(set tGPR:$dst,
+                   (sext_inreg
+                     (or (srl (and tGPR:$src, 0xFFFF), 8),
+                         (shl tGPR:$src, 8)), i16))]>,
+                Requires<[IsThumb, HasV6]>;
+
+def tROR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "ror $dst, $rhs",
+                [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>;
+
+
+// Subtract with carry
+def tSBC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "sbc $dst, $rhs",
+                [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>;
+
+def tSUBS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "sub $dst, $lhs, $rhs",
+               [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>;
+
+
+// TODO: A7-96: STMIA - store multiple.
+
+def tSUBi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                "sub $dst, $lhs, $rhs",
+                [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>;
+
+def tSUBi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                  "sub $dst, $rhs",
+                  [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>;
+
+def tSUBrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "sub $dst, $lhs, $rhs",
+                [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>;
+
+def tSUBspi : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                  "sub $dst, $rhs * 4", []>;
+
+def tSXTB  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+                "sxtb $dst, $src",
+                [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
+                Requires<[IsThumb, HasV6]>;
+def tSXTH  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+                "sxth $dst, $src",
+                [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
+                Requires<[IsThumb, HasV6]>;
+
+
+def tUXTB  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+                "uxtb $dst, $src",
+                [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
+                Requires<[IsThumb, HasV6]>;
+def tUXTH  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+                "uxth $dst, $src",
+                [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
+                Requires<[IsThumb, HasV6]>;
+
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
+// Expanded by the scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in  // Expanded by the scheduler.
+  def tMOVCCr :
+  PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
+              "@ tMOVCCr $cc",
+              [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def tLEApcrel : TIx2<(outs tGPR:$dst), (ins i32imm:$label),
+                    !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+                                          "${:private}PCRELL${:uid}+4))\n"),
+                               !strconcat("\tmov $dst, #PCRELV${:uid}\n",
+                                  "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
+                    []>;
+
+def tLEApcrelJT : TIx2<(outs tGPR:$dst), (ins i32imm:$label, i32imm:$id),
+          !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+                                         "${:private}PCRELL${:uid}+4))\n"),
+                     !strconcat("\tmov $dst, #PCRELV${:uid}\n",
+                                "${:private}PCRELL${:uid}:\n\tadd $dst, pc")),
+                    []>;
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+  Defs = [R0, LR] in {
+  def tTPsoft  : TIx2<(outs), (ins),
+               "bl __aeabi_read_tp",
+               [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// ConstantPool, GlobalAddress
+def : ThumbPat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : ThumbPat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
+
+// JumpTable
+def : ThumbPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+               (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
+def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
+
+// Indirect calls to ARM routines
+def : ThumbV5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
+
+// zextload i1 -> zextload i8
+def : ThumbPat<(zextloadi1 t_addrmode_s1:$addr),
+               (tLDRB t_addrmode_s1:$addr)>;
+
+// extload -> zextload
+def : ThumbPat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : ThumbPat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : ThumbPat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : ThumbPat<(i32 thumb_immshifted:$src),
+               (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+                       (thumb_immshifted_shamt imm:$src))>;
+
+def : ThumbPat<(i32 imm0_255_comp:$src),
+               (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 0000000..168fb45
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,12 @@
+//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 0000000..0247daf
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,398 @@
+//===- ARMInstrVFP.td - VFP support for ARM -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_FTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0 :
+SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_FMDRR :
+SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+                     SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui  : SDNode<"ARMISD::FTOUI",  SDT_FTOI>;
+def arm_ftosi  : SDNode<"ARMISD::FTOSI",  SDT_FTOI>;
+def arm_sitof  : SDNode<"ARMISD::SITOF",  SDT_ITOF>;
+def arm_uitof  : SDNode<"ARMISD::UITOF",  SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
+def arm_cmpfp  : SDNode<"ARMISD::CMPFP",  SDT_ARMCmp, [SDNPOutFlag]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
+def arm_fmdrr  : SDNode<"ARMISD::FMDRR",  SDT_FMDRR>;
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
+//
+
+let canFoldAsLoad = 1 in {
+def FLDD  : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
+                 "fldd", " $dst, $addr",
+                 [(set DPR:$dst, (load addrmode5:$addr))]>;
+
+def FLDS  : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
+                 "flds", " $dst, $addr",
+                 [(set SPR:$dst, (load addrmode5:$addr))]>;
+} // canFoldAsLoad
+
+def FSTD  : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
+                 "fstd", " $src, $addr",
+                 [(store DPR:$src, addrmode5:$addr)]>;
+
+def FSTS  : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
+                 "fsts", " $src, $addr",
+                 [(store SPR:$src, addrmode5:$addr)]>;
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+let mayLoad = 1 in {
+def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1,
+                           variable_ops),
+                  "fldm${addr:submode}d${p} ${addr:base}, $dst1",
+                  []> {
+  let Inst{20} = 1;
+}
+
+def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dst1,
+                           variable_ops),
+                  "fldm${addr:submode}s${p} ${addr:base}, $dst1",
+                  []> {
+  let Inst{20} = 1;
+}
+}
+
+let mayStore = 1 in {
+def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1,
+                           variable_ops),
+                 "fstm${addr:submode}d${p} ${addr:base}, $src1",
+                 []> {
+  let Inst{20} = 0;
+}
+
+def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$src1,
+                           variable_ops),
+                 "fstm${addr:submode}s${p} ${addr:base}, $src1",
+                 []> {
+  let Inst{20} = 0;
+}
+} // mayStore
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def FADDD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                 "faddd", " $dst, $a, $b",
+                 [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
+
+def FADDS  : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                 "fadds", " $dst, $a, $b",
+                 [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
+
+// These are encoded as unary instructions.
+def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
+                 "fcmped", " $a, $b",
+                 [(arm_cmpfp DPR:$a, DPR:$b)]>;
+
+def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
+                 "fcmpes", " $a, $b",
+                 [(arm_cmpfp SPR:$a, SPR:$b)]>;
+
+def FDIVD  : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                 "fdivd", " $dst, $a, $b",
+                 [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
+
+def FDIVS  : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                 "fdivs", " $dst, $a, $b",
+                 [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
+
+def FMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                 "fmuld", " $dst, $a, $b",
+                 [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
+
+def FMULS  : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                 "fmuls", " $dst, $a, $b",
+                 [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
+                 
+def FNMULD  : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                  "fnmuld", " $dst, $a, $b",
+                  [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> {
+  let Inst{6} = 1;
+}
+
+def FNMULS  : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                  "fnmuls", " $dst, $a, $b",
+                  [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> {
+  let Inst{6} = 1;
+}
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), DPR:$b),
+          (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+          (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+
+def FSUBD  : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+                 "fsubd", " $dst, $a, $b",
+                 [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> {
+  let Inst{6} = 1;
+}
+
+def FSUBS  : ASbI<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+                 "fsubs", " $dst, $a, $b",
+                 [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
+  let Inst{6} = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def FABSD  : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+                 "fabsd", " $dst, $a",
+                 [(set DPR:$dst, (fabs DPR:$a))]>;
+
+def FABSS  : ASuI<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+                 "fabss", " $dst, $a",
+                 [(set SPR:$dst, (fabs SPR:$a))]>;
+
+def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
+                  "fcmpezd", " $a",
+                  [(arm_cmpfp0 DPR:$a)]>;
+
+def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
+                  "fcmpezs", " $a",
+                  [(arm_cmpfp0 SPR:$a)]>;
+
+def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
+                 "fcvtds", " $dst, $a",
+                 [(set DPR:$dst, (fextend SPR:$a))]>;
+
+// Special case encoding: bits 11-8 is 0b1011.
+def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
+                 "fcvtsd", " $dst, $a",
+                 [(set SPR:$dst, (fround DPR:$a))]> {
+  let Inst{27-23} = 0b11101;
+  let Inst{21-16} = 0b110111;
+  let Inst{11-8}  = 0b1011;
+  let Inst{7-4}   = 0b1100;
+}
+
+def FCPYD  : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+                 "fcpyd", " $dst, $a", []>;
+
+def FCPYS  : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+                 "fcpys", " $dst, $a", []>;
+
+def FNEGD  : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+                 "fnegd", " $dst, $a",
+                 [(set DPR:$dst, (fneg DPR:$a))]>;
+
+def FNEGS  : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+                 "fnegs", " $dst, $a",
+                 [(set SPR:$dst, (fneg SPR:$a))]>;
+
+def FSQRTD  : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+                 "fsqrtd", " $dst, $a",
+                 [(set DPR:$dst, (fsqrt DPR:$a))]>;
+
+def FSQRTS  : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+                 "fsqrts", " $dst, $a",
+                 [(set SPR:$dst, (fsqrt SPR:$a))]>;
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies.  Int <-> FP Conversions.
+//
+
+def FMRS   : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
+                 "fmrs", " $dst, $src",
+                 [(set GPR:$dst, (bitconvert SPR:$src))]>;
+
+def FMSR   : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
+                 "fmsr", " $dst, $src",
+                 [(set SPR:$dst, (bitconvert GPR:$src))]>;
+
+def FMRRD  : AVConv3I<0b11000101, 0b1011,
+                      (outs GPR:$dst1, GPR:$dst2), (ins DPR:$src),
+                 "fmrrd", " $dst1, $dst2, $src",
+                 [/* FIXME: Can't write pattern for multiple result instr*/]>;
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def FMDRR : AVConv5I<0b11000100, 0b1011,
+                     (outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
+                "fmdrr", " $dst, $src1, $src2",
+                [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX : SPR system reg -> GPR
+
+// FMSRR: GPR -> SPR
+
+// FMXR: GPR -> VFP Sstem reg
+
+
+// Int to FP:
+
+def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+                 "fsitod", " $dst, $a",
+                 [(set DPR:$dst, (arm_sitof SPR:$a))]> {
+  let Inst{7} = 1;
+}
+
+def FSITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a),
+                 "fsitos", " $dst, $a",
+                 [(set SPR:$dst, (arm_sitof SPR:$a))]> {
+  let Inst{7} = 1;
+}
+
+def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+                 "fuitod", " $dst, $a",
+                 [(set DPR:$dst, (arm_uitof SPR:$a))]>;
+
+def FUITOS : AVConv1I<0b11101011, 0b1000, 0b1010, (outs SPR:$dst), (ins SPR:$a),
+                 "fuitos", " $dst, $a",
+                 [(set SPR:$dst, (arm_uitof SPR:$a))]>;
+
+// FP to Int:
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+
+def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
+                       (outs SPR:$dst), (ins DPR:$a),
+                 "ftosizd", " $dst, $a",
+                 [(set SPR:$dst, (arm_ftosi DPR:$a))]> {
+  let Inst{7} = 1; // Z bit
+}
+
+def FTOSIZS : AVConv1I<0b11101011, 0b1101, 0b1010,
+                       (outs SPR:$dst), (ins SPR:$a),
+                 "ftosizs", " $dst, $a",
+                 [(set SPR:$dst, (arm_ftosi SPR:$a))]> {
+  let Inst{7} = 1; // Z bit
+}
+
+def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
+                       (outs SPR:$dst), (ins DPR:$a),
+                 "ftouizd", " $dst, $a",
+                 [(set SPR:$dst, (arm_ftoui DPR:$a))]> {
+  let Inst{7} = 1; // Z bit
+}
+
+def FTOUIZS : AVConv1I<0b11101011, 0b1100, 0b1010,
+                       (outs SPR:$dst), (ins SPR:$a),
+                 "ftouizs", " $dst, $a",
+                 [(set SPR:$dst, (arm_ftoui SPR:$a))]> {
+  let Inst{7} = 1; // Z bit
+}
+
+//===----------------------------------------------------------------------===//
+// FP FMA Operations.
+//
+
+def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+                "fmacd", " $dst, $a, $b",
+                [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+                RegConstraint<"$dstin = $dst">;
+
+def FMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                "fmacs", " $dst, $a, $b",
+                [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+                RegConstraint<"$dstin = $dst">;
+
+def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+                "fmscd", " $dst, $a, $b",
+                [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
+                RegConstraint<"$dstin = $dst">;
+
+def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                "fmscs", " $dst, $a, $b",
+                [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
+                RegConstraint<"$dstin = $dst">;
+
+def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+                 "fnmacd", " $dst, $a, $b",
+             [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+                RegConstraint<"$dstin = $dst"> {
+  let Inst{6} = 1;
+}
+
+def FNMACS : ASbI<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                "fnmacs", " $dst, $a, $b",
+             [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+                RegConstraint<"$dstin = $dst"> {
+  let Inst{6} = 1;
+}
+
+def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+                 "fnmscd", " $dst, $a, $b",
+             [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
+                RegConstraint<"$dstin = $dst"> {
+  let Inst{6} = 1;
+}
+
+def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+                "fnmscs", " $dst, $a, $b",
+             [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
+                RegConstraint<"$dstin = $dst"> {
+  let Inst{6} = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+def FCPYDcc  : ADuI<0b11101011, 0b0000, 0b0100,
+                    (outs DPR:$dst), (ins DPR:$false, DPR:$true),
+                    "fcpyd", " $dst, $true",
+                [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
+                    RegConstraint<"$false = $dst">;
+
+def FCPYScc  : ASuI<0b11101011, 0b0000, 0b0100,
+                    (outs SPR:$dst), (ins SPR:$false, SPR:$true),
+                    "fcpys", " $dst, $true",
+                [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
+                    RegConstraint<"$false = $dst">;
+
+def FNEGDcc  : ADuI<0b11101011, 0b0001, 0b0100,
+                    (outs DPR:$dst), (ins DPR:$false, DPR:$true),
+                    "fnegd", " $dst, $true",
+                [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
+                    RegConstraint<"$false = $dst">;
+
+def FNEGScc  : ASuI<0b11101011, 0b0001, 0b0100,
+                    (outs SPR:$dst), (ins SPR:$false, SPR:$true),
+                    "fnegs", " $dst, $true",
+                [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
+                    RegConstraint<"$false = $dst">;
+
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+let Defs = [CPSR] in
+def FMSTAT : AI<(outs), (ins), VFPMiscFrm, "fmstat", "", [(arm_fmstat)]> {
+  let Inst{27-20} = 0b11101111;
+  let Inst{19-16} = 0b0001;
+  let Inst{15-12} = 0b1111;
+  let Inst{11-8}  = 0b1010;
+  let Inst{7}     = 0;
+  let Inst{4}     = 1;
+}
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 0000000..e551c41
--- /dev/null
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,298 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Config/alloca.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Streams.h"
+#include "llvm/System/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+  abort();
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host.  This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us (we need
+// to preserve more context and manipulate the stack directly).  Instead,
+// write our own wrapper, which does things our way, so we have complete 
+// control over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+  void ARMCompilationCallback(void);
+  asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl " ASMPREFIX "ARMCompilationCallback\n"
+    ASMPREFIX "ARMCompilationCallback:\n"
+    // Save caller saved registers since they may contain stuff
+    // for the real target function right now. We have to act as if this
+    // whole compilation callback doesn't exist as far as the caller is
+    // concerned, so we can't just preserve the callee saved regs.
+    "stmdb sp!, {r0, r1, r2, r3, lr}\n"
+#ifndef __SOFTFP__
+    "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+    // The LR contains the address of the stub function on entry.
+    // pass it as the argument to the C part of the callback
+    "mov  r0, lr\n"
+    "sub  sp, sp, #4\n"
+    // Call the C portion of the callback
+    "bl   " ASMPREFIX "ARMCompilationCallbackC\n"
+    "add  sp, sp, #4\n"
+    // Restoring the LR to the return address of the function that invoked
+    // the stub and de-allocating the stack space for it requires us to
+    // swap the two saved LR values on the stack, as they're backwards
+    // for what we need since the pop instruction has a pre-determined
+    // order for the registers.
+    //      +--------+
+    //   0  | LR     | Original return address
+    //      +--------+    
+    //   1  | LR     | Stub address (start of stub)
+    // 2-5  | R3..R0 | Saved registers (we need to preserve all regs)
+    // 6-20 | D0..D7 | Saved VFP registers
+    //      +--------+    
+    //
+#ifndef __SOFTFP__
+    // Restore VFP caller-saved registers.
+    "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+    //
+    //      We need to exchange the values in slots 0 and 1 so we can
+    //      return to the address in slot 1 with the address in slot 0
+    //      restored to the LR.
+    "ldr  r0, [sp,#20]\n"
+    "ldr  r1, [sp,#16]\n"
+    "str  r1, [sp,#20]\n"
+    "str  r0, [sp,#16]\n"
+    // Return to the (newly modified) stub to invoke the real function.
+    // The above twiddling of the saved return addresses allows us to
+    // deallocate everything, including the LR the stub saved, all in one
+    // pop instruction.
+    "ldmia  sp!, {r0, r1, r2, r3, lr, pc}\n"
+      );
+#else  // Not an ARM host
+  void ARMCompilationCallback() {
+    assert(0 && "Cannot call ARMCompilationCallback() on a non-ARM arch!\n");
+    abort();
+  }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked 
+/// by the function stub when we did not know the real target of a call.  
+/// This function must locate the start of the stub or call site and pass 
+/// it into the JIT compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
+  // Get the address of the compiled code for this function.
+  intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
+
+  // Rewrite the call target... so that we don't end up here every time we
+  // execute the call. We're replacing the first two instructions of the
+  // stub with:
+  //   ldr pc, [pc,#-4]
+  //   <addr>
+  if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
+    cerr << "ERROR: Unable to mark stub writable\n";
+    abort();
+  }
+  *(intptr_t *)StubAddr = 0xe51ff004;  // ldr pc, [pc, #-4]
+  *(intptr_t *)(StubAddr+4) = NewVal;
+  if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
+    cerr << "ERROR: Unable to mark stub executable\n";
+    abort();
+  }
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+  JITCompilerFunction = F;
+  return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
+                                             JITCodeEmitter &JCE) {
+  JCE.startGVStub(GV, 4, 4);
+  JCE.emitWordLE((intptr_t)Ptr);
+  void *PtrAddr = JCE.finishGVStub(GV);
+  addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
+  return PtrAddr;
+}
+
+void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE) {
+  // If this is just a call to an external function, emit a branch instead of a
+  // call.  The code is the same except for one bit of the last instruction.
+  if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+    // Branch to the corresponding function addr.
+    if (IsPIC) {
+      // The stub is 8-byte size and 4-aligned.
+      intptr_t LazyPtr = getIndirectSymAddr(Fn);
+      if (!LazyPtr) {
+        // In PIC mode, the function stub is loading a lazy-ptr.
+        LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+        if (F)
+          DOUT << "JIT: Indirect symbol emitted at [" << LazyPtr << "] for GV '"
+               << F->getName() << "'\n";
+        else
+          DOUT << "JIT: Stub emitted at [" << LazyPtr
+               << "] for external function at '" << Fn << "'\n";
+      }
+      JCE.startGVStub(F, 16, 4);
+      intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+      JCE.emitWordLE(0xe59fc004);            // ldr pc, [pc, #+4]
+      JCE.emitWordLE(0xe08fc00c);            // L_func$scv: add ip, pc, ip
+      JCE.emitWordLE(0xe59cf000);            // ldr pc, [ip]
+      JCE.emitWordLE(LazyPtr - (Addr+4+8));  // func - (L_func$scv+8)
+      sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
+    } else {
+      // The stub is 8-byte size and 4-aligned.
+      JCE.startGVStub(F, 8, 4);
+      intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+      JCE.emitWordLE(0xe51ff004);    // ldr pc, [pc, #-4]
+      JCE.emitWordLE((intptr_t)Fn);  // addr of function
+      sys::Memory::InvalidateInstructionCache((void*)Addr, 8);
+    }
+  } else {
+    // The compilation callback will overwrite the first two words of this
+    // stub with indirect branch instructions targeting the compiled code. 
+    // This stub sets the return address to restart the stub, so that
+    // the new branch will be invoked when we come back.
+    //
+    // Branch and link to the compilation callback.
+    // The stub is 16-byte size and 4-byte aligned.
+    JCE.startGVStub(F, 16, 4);
+    intptr_t Addr = (intptr_t)JCE.getCurrentPCValue();
+    // Save LR so the callback can determine which stub called it.
+    // The compilation callback is responsible for popping this prior
+    // to returning.
+    JCE.emitWordLE(0xe92d4000); // push {lr}
+    // Set the return address to go back to the start of this stub.
+    JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
+    // Invoke the compilation callback.
+    JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+    // The address of the compilation callback.
+    JCE.emitWordLE((intptr_t)ARMCompilationCallback);
+    sys::Memory::InvalidateInstructionCache((void*)Addr, 16);
+  }
+
+  return JCE.finishGVStub(F);
+}
+
+intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
+  ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
+  switch (RT) {
+  default:
+    return (intptr_t)(MR->getResultPointer());
+  case ARM::reloc_arm_pic_jt:
+    // Destination address - jump table base.
+    return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
+  case ARM::reloc_arm_jt_base:
+    // Jump table base address.
+    return getJumpTableBaseAddr(MR->getJumpTableIndex());
+  case ARM::reloc_arm_cp_entry:
+  case ARM::reloc_arm_vfp_cp_entry:
+    // Constant pool entry address.
+    return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
+  case ARM::reloc_arm_machine_cp_entry: {
+    ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
+    assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
+           "Can't handle this machine constant pool entry yet!");
+    intptr_t Addr = (intptr_t)(MR->getResultPointer());
+    Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
+    return Addr;
+  }
+  }
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase) {
+  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+    void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+    intptr_t ResultPtr = resolveRelocDestAddr(MR);
+    switch ((ARM::RelocationType)MR->getRelocationType()) {
+    case ARM::reloc_arm_cp_entry:
+    case ARM::reloc_arm_vfp_cp_entry:
+    case ARM::reloc_arm_relative: {
+      // It is necessary to calculate the correct PC relative value. We
+      // subtract the base addr from the target addr to form a byte offset.
+      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+      // If the result is positive, set bit U(23) to 1.
+      if (ResultPtr >= 0)
+        *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
+      else {
+        // Otherwise, obtain the absolute value and set bit U(23) to 0.
+        *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
+        ResultPtr = - ResultPtr;
+      }
+      // Set the immed value calculated.
+      // VFP immediate offset is multiplied by 4.
+      if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
+        ResultPtr = ResultPtr >> 2;
+      *((intptr_t*)RelocPos) |= ResultPtr;
+      // Set register Rn to PC.
+      *((intptr_t*)RelocPos) |=
+        ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+      break;
+    }
+    case ARM::reloc_arm_pic_jt:
+    case ARM::reloc_arm_machine_cp_entry:
+    case ARM::reloc_arm_absolute: {
+      // These addresses have already been resolved.
+      *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
+      break;
+    }
+    case ARM::reloc_arm_branch: {
+      // It is necessary to calculate the correct value of signed_immed_24
+      // field. We subtract the base addr from the target addr to form a
+      // byte offset, which must be inside the range -33554432 and +33554428.
+      // Then, we set the signed_immed_24 field of the instruction to bits
+      // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
+      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+      ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
+      assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
+      *((intptr_t*)RelocPos) |= ResultPtr;
+      break;
+    }
+    case ARM::reloc_arm_jt_base: {
+      // JT base - (instruction addr + 8)
+      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+      *((intptr_t*)RelocPos) |= ResultPtr;
+      break;
+    }
+    }
+  }
+}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 0000000..7dfeed8
--- /dev/null
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,178 @@
+//===- ARMJITInfo.h - ARM implementation of the JIT interface  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+  class ARMTargetMachine;
+
+  class ARMJITInfo : public TargetJITInfo {
+    // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
+    // CONSTPOOL_ENTRY addresses.
+    SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
+
+    // JumpTableId2AddrMap - A map from inline jumptable ids to the
+    // corresponding inline jump table bases.
+    SmallVector<intptr_t, 16> JumpTableId2AddrMap;
+
+    // PCLabelMap - A map from PC labels to addresses.
+    DenseMap<unsigned, intptr_t> PCLabelMap;
+
+    // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
+    // addresses to their indirect symbol addresses.
+    DenseMap<void*, intptr_t> Sym2IndirectSymMap;
+
+    // IsPIC - True if the relocation model is PIC. This is used to determine
+    // how to codegen function stubs.
+    bool IsPIC;
+
+  public:
+    explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
+
+    /// replaceMachineCodeForFunction - Make it so that calling the function
+    /// whose machine code is at OLD turns into a call to NEW, perhaps by
+    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
+    /// code.
+    ///
+    virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+    /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+    /// to emit an indirect symbol which contains the address of the specified
+    /// ptr.
+    virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+                                            JITCodeEmitter &JCE);
+
+    /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+    /// small native function that simply calls the function at the specified
+    /// address.
+    virtual void *emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE);
+
+    /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+    /// relocate - Before the JIT can run a block of code that has been emitted,
+    /// it must rewrite the code to contain the actual addresses of any
+    /// referenced global symbols.
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase);
+
+    /// hasCustomConstantPool - Allows a target to specify that constant
+    /// pool address resolution is handled by the target.
+    virtual bool hasCustomConstantPool() const { return true; }
+
+    /// hasCustomJumpTables - Allows a target to specify that jumptables
+    /// are emitted by the target.
+    virtual bool hasCustomJumpTables() const { return true; }
+
+    /// allocateSeparateGVMemory - If true, globals should be placed in
+    /// separately allocated heap memory rather than in the same
+    /// code memory allocated by JITCodeEmitter.
+    virtual bool allocateSeparateGVMemory() const {
+#ifdef __APPLE__
+      return true;
+#else
+      return false;
+#endif
+    }
+
+    /// Initialize - Initialize internal stage for the function being JITted.
+    /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
+    /// jump table ids to jump table bases map; remember if codegen relocation
+    /// model is PIC.
+    void Initialize(const MachineFunction &MF, bool isPIC) {
+      const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+      ConstPoolId2AddrMap.resize(AFI->getNumConstPoolEntries());
+      JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+      IsPIC = isPIC;
+    }
+
+    /// getConstantPoolEntryAddr - The ARM target puts all constant
+    /// pool entries into constant islands. This returns the address of the
+    /// constant pool entry of the specified index.
+    intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
+      assert(CPI < ConstPoolId2AddrMap.size());
+      return ConstPoolId2AddrMap[CPI];
+    }
+
+    /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
+    /// where its associated value is stored. When relocations are processed,
+    /// this value will be used to resolve references to the constant.
+    void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
+      assert(CPI < ConstPoolId2AddrMap.size());
+      ConstPoolId2AddrMap[CPI] = Addr;
+    }
+
+    /// getJumpTableBaseAddr - The ARM target inline all jump tables within
+    /// text section of the function. This returns the address of the base of
+    /// the jump table of the specified index.
+    intptr_t getJumpTableBaseAddr(unsigned JTI) const {
+      assert(JTI < JumpTableId2AddrMap.size());
+      return JumpTableId2AddrMap[JTI];
+    }
+
+    /// addJumpTableBaseAddr - Map a jump table index to the address where
+    /// the corresponding inline jump table is emitted. When relocations are
+    /// processed, this value will be used to resolve references to the
+    /// jump table.
+    void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
+      assert(JTI < JumpTableId2AddrMap.size());
+      JumpTableId2AddrMap[JTI] = Addr;
+    }
+
+    /// getPCLabelAddr - Retrieve the address of the PC label of the specified id.
+    intptr_t getPCLabelAddr(unsigned Id) const {
+      DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
+      assert(I != PCLabelMap.end());
+      return I->second;
+    }
+
+    /// addPCLabelAddr - Remember the address of the specified PC label.
+    void addPCLabelAddr(unsigned Id, intptr_t Addr) {
+      PCLabelMap.insert(std::make_pair(Id, Addr));
+    }
+
+    /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
+    /// specified symbol located at address. Returns 0 if the indirect symbol
+    /// has not been emitted.
+    intptr_t getIndirectSymAddr(void *Addr) const {
+      DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
+      if (I != Sym2IndirectSymMap.end())
+        return I->second;
+      return 0;
+    }
+
+    /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
+    /// its indirect symbol address.
+    void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
+      Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
+    }
+
+  private:
+    /// resolveRelocDestAddr - Resolve the resulting address of the relocation
+    /// if it's not already solved. Constantpool entries must be resolved by
+    /// ARM target.
+    intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
+  };
+}
+
+#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 0000000..047552f
--- /dev/null
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,778 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
+STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+
+namespace {
+  struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
+    static char ID;
+    ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
+
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    ARMFunctionInfo *AFI;
+    RegScavenger *RS;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM load / store optimization pass";
+    }
+
+  private:
+    struct MemOpQueueEntry {
+      int Offset;
+      unsigned Position;
+      MachineBasicBlock::iterator MBBI;
+      bool Merged;
+      MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
+        : Offset(o), Position(p), MBBI(i), Merged(false) {};
+    };
+    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+    typedef MemOpQueue::iterator MemOpQueueIter;
+
+    SmallVector<MachineBasicBlock::iterator, 4>
+    MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+                 int Opcode, unsigned Size,
+                 ARMCC::CondCodes Pred, unsigned PredReg,
+                 unsigned Scratch, MemOpQueue &MemOps);
+
+    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+  };
+  char ARMLoadStoreOpt::ID = 0;
+}
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
+  return new ARMLoadStoreOpt();
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode) {
+  switch (Opcode) {
+  case ARM::LDR:
+    NumLDMGened++;
+    return ARM::LDM;
+  case ARM::STR:
+    NumSTMGened++;
+    return ARM::STM;
+  case ARM::FLDS:
+    NumFLDMGened++;
+    return ARM::FLDMS;
+  case ARM::FSTS:
+    NumFSTMGened++;
+    return ARM::FSTMS;
+  case ARM::FLDD:
+    NumFLDMGened++;
+    return ARM::FLDMD;
+  case ARM::FSTD:
+    NumFSTMGened++;
+    return ARM::FSTMD;
+  default: abort();
+  }
+  return 0;
+}
+
+/// mergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done. 
+static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                     int Offset, unsigned Base, bool BaseKill, int Opcode,
+                     ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+                     SmallVector<std::pair<unsigned, bool>, 8> &Regs,
+                     const TargetInstrInfo *TII) {
+  // FIXME would it be better to take a DL from one of the loads arbitrarily?
+  DebugLoc dl = DebugLoc::getUnknownLoc();
+  // Only a single register to load / store. Don't bother.
+  unsigned NumRegs = Regs.size();
+  if (NumRegs <= 1)
+    return false;
+
+  ARM_AM::AMSubMode Mode = ARM_AM::ia;
+  bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  if (isAM4 && Offset == 4)
+    Mode = ARM_AM::ib;
+  else if (isAM4 && Offset == -4 * (int)NumRegs + 4)
+    Mode = ARM_AM::da;
+  else if (isAM4 && Offset == -4 * (int)NumRegs)
+    Mode = ARM_AM::db;
+  else if (Offset != 0) {
+    // If starting offset isn't zero, insert a MI to materialize a new base.
+    // But only do so if it is cost effective, i.e. merging more than two
+    // loads / stores.
+    if (NumRegs <= 2)
+      return false;
+
+    unsigned NewBase;
+    if (Opcode == ARM::LDR)
+      // If it is a load, then just use one of the destination register to
+      // use as the new base.
+      NewBase = Regs[NumRegs-1].first;
+    else {
+      // Use the scratch register to use as a new base.
+      NewBase = Scratch;
+      if (NewBase == 0)
+        return false;
+    }
+    int BaseOpc = ARM::ADDri;
+    if (Offset < 0) {
+      BaseOpc = ARM::SUBri;
+      Offset = - Offset;
+    }
+    int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+    if (ImmedOffset == -1)
+      return false;  // Probably not worth it then.
+
+    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+      .addReg(Base, getKillRegState(BaseKill)).addImm(ImmedOffset)
+      .addImm(Pred).addReg(PredReg).addReg(0);
+    Base = NewBase;
+    BaseKill = true;  // New base is always killed right its use.
+  }
+
+  bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
+  bool isDef = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+  Opcode = getLoadStoreMultipleOpcode(Opcode);
+  MachineInstrBuilder MIB = (isAM4)
+    ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+        .addReg(Base, getKillRegState(BaseKill))
+        .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
+    : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+        .addReg(Base, getKillRegState(BaseKill))
+        .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs))
+        .addImm(Pred).addReg(PredReg);
+  for (unsigned i = 0; i != NumRegs; ++i)
+    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
+                     | getKillRegState(Regs[i].second));
+
+  return true;
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+SmallVector<MachineBasicBlock::iterator, 4>
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+                              unsigned Base, int Opcode, unsigned Size,
+                              ARMCC::CondCodes Pred, unsigned PredReg,
+                              unsigned Scratch, MemOpQueue &MemOps) {
+  SmallVector<MachineBasicBlock::iterator, 4> Merges;
+  bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  int Offset = MemOps[SIndex].Offset;
+  int SOffset = Offset;
+  unsigned Pos = MemOps[SIndex].Position;
+  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+  unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg();
+  unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg);
+  bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill();
+
+  SmallVector<std::pair<unsigned,bool>, 8> Regs;
+  Regs.push_back(std::make_pair(PReg, isKill));
+  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+    int NewOffset = MemOps[i].Offset;
+    unsigned Reg = MemOps[i].MBBI->getOperand(0).getReg();
+    unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
+    isKill = MemOps[i].MBBI->getOperand(0).isKill();
+    // AM4 - register numbers in ascending order.
+    // AM5 - consecutive register numbers in ascending order.
+    if (NewOffset == Offset + (int)Size &&
+        ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
+      Offset += Size;
+      Regs.push_back(std::make_pair(Reg, isKill));
+      PRegNum = RegNum;
+    } else {
+      // Can't merge this in. Try merge the earlier ones first.
+      if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg,
+                   Scratch, Regs, TII)) {
+        Merges.push_back(prior(Loc));
+        for (unsigned j = SIndex; j < i; ++j) {
+          MBB.erase(MemOps[j].MBBI);
+          MemOps[j].Merged = true;
+        }
+      }
+      SmallVector<MachineBasicBlock::iterator, 4> Merges2 =
+        MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps);
+      Merges.append(Merges2.begin(), Merges2.end());
+      return Merges;
+    }
+
+    if (MemOps[i].Position > Pos) {
+      Pos = MemOps[i].Position;
+      Loc = MemOps[i].MBBI;
+    }
+  }
+
+  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+  if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg,
+               Scratch, Regs, TII)) {
+    Merges.push_back(prior(Loc));
+    for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) {
+      MBB.erase(MemOps[i].MBBI);
+      MemOps[i].Merged = true;
+    }
+  }
+
+  return Merges;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+static ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx == -1) {
+    PredReg = 0;
+    return ARMCC::AL;
+  }
+
+  PredReg = MI->getOperand(PIdx+1).getReg();
+  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+                                       unsigned Bytes, ARMCC::CondCodes Pred,
+                                       unsigned PredReg) {
+  unsigned MyPredReg = 0;
+  return (MI && MI->getOpcode() == ARM::SUBri &&
+          MI->getOperand(0).getReg() == Base &&
+          MI->getOperand(1).getReg() == Base &&
+          ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
+          getInstrPredicate(MI, MyPredReg) == Pred &&
+          MyPredReg == PredReg);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+                                       unsigned Bytes, ARMCC::CondCodes Pred,
+                                       unsigned PredReg) {
+  unsigned MyPredReg = 0;
+  return (MI && MI->getOpcode() == ARM::ADDri &&
+          MI->getOperand(0).getReg() == Base &&
+          MI->getOperand(1).getReg() == Base &&
+          ARM_AM::getAM2Offset(MI->getOperand(2).getImm()) == Bytes &&
+          getInstrPredicate(MI, MyPredReg) == Pred &&
+          MyPredReg == PredReg);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default: return 0;
+  case ARM::LDR:
+  case ARM::STR:
+  case ARM::FLDS:
+  case ARM::FSTS:
+    return 4;
+  case ARM::FLDD:
+  case ARM::FSTD:
+    return 8;
+  case ARM::LDM:
+  case ARM::STM:
+    return (MI->getNumOperands() - 4) * 4;
+  case ARM::FLDMS:
+  case ARM::FSTMS:
+  case ARM::FLDMD:
+  case ARM::FSTMD:
+    return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+  }
+}
+
+/// mergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+static bool mergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      bool &Advance,
+                                      MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = MBBI;
+  unsigned Base = MI->getOperand(0).getReg();
+  unsigned Bytes = getLSMultipleTransferSize(MI);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  int Opcode = MI->getOpcode();
+  bool isAM4 = Opcode == ARM::LDM || Opcode == ARM::STM;
+
+  if (isAM4) {
+    if (ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm()))
+      return false;
+
+    // Can't use the updating AM4 sub-mode if the base register is also a dest
+    // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+    for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+      if (MI->getOperand(i).getReg() == Base)
+        return false;
+    }
+
+    ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
+    if (MBBI != MBB.begin()) {
+      MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+      if (Mode == ARM_AM::ia &&
+          isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::db, true));
+        MBB.erase(PrevMBBI);
+        return true;
+      } else if (Mode == ARM_AM::ib &&
+                 isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(ARM_AM::da, true));
+        MBB.erase(PrevMBBI);
+        return true;
+      }
+    }
+
+    if (MBBI != MBB.end()) {
+      MachineBasicBlock::iterator NextMBBI = next(MBBI);
+      if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+          isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+        if (NextMBBI == I) {
+          Advance = true;
+          ++I;
+        }
+        MBB.erase(NextMBBI);
+        return true;
+      } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+                 isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+        MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
+        if (NextMBBI == I) {
+          Advance = true;
+          ++I;
+        }
+        MBB.erase(NextMBBI);
+        return true;
+      }
+    }
+  } else {
+    // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+    if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
+      return false;
+
+    ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
+    unsigned Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+    if (MBBI != MBB.begin()) {
+      MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+      if (Mode == ARM_AM::ia &&
+          isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+        MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::db, true, Offset));
+        MBB.erase(PrevMBBI);
+        return true;
+      }
+    }
+
+    if (MBBI != MBB.end()) {
+      MachineBasicBlock::iterator NextMBBI = next(MBBI);
+      if (Mode == ARM_AM::ia &&
+          isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+        MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
+        if (NextMBBI == I) {
+          Advance = true;
+          ++I;
+        }
+        MBB.erase(NextMBBI);
+      }
+      return true;
+    }
+  }
+
+  return false;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
+  switch (Opc) {
+  case ARM::LDR: return ARM::LDR_PRE;
+  case ARM::STR: return ARM::STR_PRE;
+  case ARM::FLDS: return ARM::FLDMS;
+  case ARM::FLDD: return ARM::FLDMD;
+  case ARM::FSTS: return ARM::FSTMS;
+  case ARM::FSTD: return ARM::FSTMD;
+  default: abort();
+  }
+  return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
+  switch (Opc) {
+  case ARM::LDR: return ARM::LDR_POST;
+  case ARM::STR: return ARM::STR_POST;
+  case ARM::FLDS: return ARM::FLDMS;
+  case ARM::FLDD: return ARM::FLDMD;
+  case ARM::FSTS: return ARM::FSTMS;
+  case ARM::FSTD: return ARM::FSTMD;
+  default: abort();
+  }
+  return 0;
+}
+
+/// mergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     const TargetInstrInfo *TII,
+                                     bool &Advance,
+                                     MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = MBBI;
+  unsigned Base = MI->getOperand(1).getReg();
+  bool BaseKill = MI->getOperand(1).isKill();
+  unsigned Bytes = getLSMultipleTransferSize(MI);
+  int Opcode = MI->getOpcode();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  if ((isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) ||
+      (!isAM2 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0))
+    return false;
+
+  bool isLd = Opcode == ARM::LDR || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+  // Can't do the merge if the destination register is the same as the would-be
+  // writeback register.
+  if (isLd && MI->getOperand(0).getReg() == Base)
+    return false;
+
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  bool DoMerge = false;
+  ARM_AM::AddrOpc AddSub = ARM_AM::add;
+  unsigned NewOpc = 0;
+  if (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Pred, PredReg)) {
+      DoMerge = true;
+      AddSub = ARM_AM::sub;
+      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+    } else if (isAM2 && isMatchingIncrement(PrevMBBI, Base, Bytes,
+                                            Pred, PredReg)) {
+      DoMerge = true;
+      NewOpc = getPreIndexedLoadStoreOpcode(Opcode);
+    }
+    if (DoMerge)
+      MBB.erase(PrevMBBI);
+  }
+
+  if (!DoMerge && MBBI != MBB.end()) {
+    MachineBasicBlock::iterator NextMBBI = next(MBBI);
+    if (isAM2 && isMatchingDecrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+      DoMerge = true;
+      AddSub = ARM_AM::sub;
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Pred, PredReg)) {
+      DoMerge = true;
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode);
+    }
+    if (DoMerge) {
+      if (NextMBBI == I) {
+        Advance = true;
+        ++I;
+      }
+      MBB.erase(NextMBBI);
+    }
+  }
+
+  if (!DoMerge)
+    return false;
+
+  bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+  unsigned Offset = isAM2 ? ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift)
+    : ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) ? ARM_AM::db : ARM_AM::ia,
+                        true, isDPR ? 2 : 1);
+  if (isLd) {
+    if (isAM2)
+      // LDR_PRE, LDR_POST;
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+        .addReg(Base, RegState::Define)
+        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+    else
+      // FLDMS, FLDMD
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+        .addReg(Base, getKillRegState(BaseKill))
+        .addImm(Offset).addImm(Pred).addReg(PredReg)
+        .addReg(MI->getOperand(0).getReg(), RegState::Define);
+  } else {
+    MachineOperand &MO = MI->getOperand(0);
+    if (isAM2)
+      // STR_PRE, STR_POST;
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+        .addReg(MO.getReg(), getKillRegState(BaseKill))
+        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+    else
+      // FSTMS, FSTMD
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
+        .addImm(Pred).addReg(PredReg)
+        .addReg(MO.getReg(), getKillRegState(MO.isKill()));
+  }
+  MBB.erase(MBBI);
+
+  return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operations (that this
+/// pass is capable of operating on).
+static bool isMemoryOp(MachineInstr *MI) {
+  int Opcode = MI->getOpcode();
+  switch (Opcode) {
+  default: break;
+  case ARM::LDR:
+  case ARM::STR:
+    return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
+  case ARM::FLDS:
+  case ARM::FSTS:
+    return MI->getOperand(1).isReg();
+  case ARM::FLDD:
+  case ARM::FSTD:
+    return MI->getOperand(1).isReg();
+  }
+  return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+  unsigned Position = MemOps[0].Position;
+  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+    if (MemOps[i].Position < Position) {
+      Position = MemOps[i].Position;
+      Loc = MemOps[i].MBBI;
+    }
+  }
+
+  if (Loc != MBB.begin())
+    RS->forward(prior(Loc));
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+  unsigned NumMerges = 0;
+  unsigned NumMemOps = 0;
+  MemOpQueue MemOps;
+  unsigned CurrBase = 0;
+  int CurrOpc = -1;
+  unsigned CurrSize = 0;
+  ARMCC::CondCodes CurrPred = ARMCC::AL;
+  unsigned CurrPredReg = 0;
+  unsigned Position = 0;
+
+  RS->enterBasicBlock(&MBB);
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    bool Advance  = false;
+    bool TryMerge = false;
+    bool Clobber  = false;
+
+    bool isMemOp = isMemoryOp(MBBI);
+    if (isMemOp) {
+      int Opcode = MBBI->getOpcode();
+      bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+      unsigned Size = getLSMultipleTransferSize(MBBI);
+      unsigned Base = MBBI->getOperand(1).getReg();
+      unsigned PredReg = 0;
+      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
+      unsigned NumOperands = MBBI->getDesc().getNumOperands();
+      unsigned OffField = MBBI->getOperand(NumOperands-3).getImm();
+      int Offset = isAM2
+        ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+      if (isAM2) {
+        if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+          Offset = -Offset;
+      } else {
+        if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+          Offset = -Offset;
+      }
+      // Watch out for:
+      // r4 := ldr [r5]
+      // r5 := ldr [r5, #4]
+      // r6 := ldr [r5, #8]
+      //
+      // The second ldr has effectively broken the chain even though it
+      // looks like the later ldr(s) use the same base register. Try to
+      // merge the ldr's so far, including this one. But don't try to
+      // combine the following ldr(s).
+      Clobber = (Opcode == ARM::LDR && Base == MBBI->getOperand(0).getReg());
+      if (CurrBase == 0 && !Clobber) {
+        // Start of a new chain.
+        CurrBase = Base;
+        CurrOpc  = Opcode;
+        CurrSize = Size;
+        CurrPred = Pred;
+        CurrPredReg = PredReg;
+        MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+        NumMemOps++;
+        Advance = true;
+      } else {
+        if (Clobber) {
+          TryMerge = true;
+          Advance = true;
+        }
+
+        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+          // No need to match PredReg.
+          // Continue adding to the queue.
+          if (Offset > MemOps.back().Offset) {
+            MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
+            NumMemOps++;
+            Advance = true;
+          } else {
+            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+                 I != E; ++I) {
+              if (Offset < I->Offset) {
+                MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
+                NumMemOps++;
+                Advance = true;
+                break;
+              } else if (Offset == I->Offset) {
+                // Collision! This can't be merged!
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    if (Advance) {
+      ++Position;
+      ++MBBI;
+    } else
+      TryMerge = true;
+
+    if (TryMerge) {
+      if (NumMemOps > 1) {
+        // Try to find a free register to use as a new base in case it's needed.
+        // First advance to the instruction just before the start of the chain.
+        AdvanceRS(MBB, MemOps);
+        // Find a scratch register. Make sure it's a call clobbered register or
+        // a spilled callee-saved register.
+        unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass, true);
+        if (!Scratch)
+          Scratch = RS->FindUnusedReg(&ARM::GPRRegClass,
+                                      AFI->getSpilledCSRegisters());
+        // Process the load / store instructions.
+        RS->forward(prior(MBBI));
+
+        // Merge ops.
+        SmallVector<MachineBasicBlock::iterator,4> MBBII =
+          MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+                       CurrPred, CurrPredReg, Scratch, MemOps);
+
+        // Try folding preceeding/trailing base inc/dec into the generated
+        // LDM/STM ops.
+        for (unsigned i = 0, e = MBBII.size(); i < e; ++i)
+          if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI))
+            NumMerges++;
+        NumMerges += MBBII.size();
+
+        // Try folding preceeding/trailing base inc/dec into those load/store
+        // that were not merged to form LDM/STM ops.
+        for (unsigned i = 0; i != NumMemOps; ++i)
+          if (!MemOps[i].Merged)
+            if (mergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+              NumMerges++;
+
+        // RS may be pointing to an instruction that's deleted. 
+        RS->skipTo(prior(MBBI));
+      }
+
+      CurrBase = 0;
+      CurrOpc = -1;
+      CurrSize = 0;
+      CurrPred = ARMCC::AL;
+      CurrPredReg = 0;
+      if (NumMemOps) {
+        MemOps.clear();
+        NumMemOps = 0;
+      }
+
+      // If iterator hasn't been advanced and this is not a memory op, skip it.
+      // It can't start a new chain anyway.
+      if (!Advance && !isMemOp && MBBI != E) {
+        ++Position;
+        ++MBBI;
+      }
+    }
+  }
+  return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
+/// (bx lr) into the preceeding stack restore so it directly restore the value
+/// of LR into pc.
+///   ldmfd sp!, {r7, lr}
+///   bx lr
+/// =>
+///   ldmfd sp!, {r7, pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+  if (MBB.empty()) return false;
+
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  if (MBBI->getOpcode() == ARM::BX_RET && MBBI != MBB.begin()) {
+    MachineInstr *PrevMI = prior(MBBI);
+    if (PrevMI->getOpcode() == ARM::LDM) {
+      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+      if (MO.getReg() == ARM::LR) {
+        PrevMI->setDesc(TII->get(ARM::LDM_RET));
+        MO.setReg(ARM::PC);
+        MBB.erase(MBBI);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetMachine &TM = Fn.getTarget();
+  AFI = Fn.getInfo<ARMFunctionInfo>();
+  TII = TM.getInstrInfo();
+  TRI = TM.getRegisterInfo();
+  RS = new RegScavenger();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= LoadStoreMultipleOpti(MBB);
+    Modified |= MergeReturnIntoLDM(MBB);
+  }
+
+  delete RS;
+  return Modified;
+}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 0000000..6662be1
--- /dev/null
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,238 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunction private
+/// ARM target-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+  /// isThumb - True if this function is compiled under Thumb mode.
+  /// Used to initialized Align, so must precede it.
+  bool isThumb;
+
+  /// Align - required alignment.  ARM functions and Thumb functions with
+  /// constant pools require 4-byte alignment; other Thumb functions
+  /// require only 2-byte alignment.
+  unsigned Align;
+
+  /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+  ///
+  unsigned VarArgsRegSaveSize;
+
+  /// HasStackFrame - True if this function has a stack frame. Set by
+  /// processFunctionBeforeCalleeSavedScan().
+  bool HasStackFrame;
+
+  /// LRSpilledForFarJump - True if the LR register has been for spilled to
+  /// enable far jump.
+  bool LRSpilledForFarJump;
+
+  /// R3IsLiveIn - True if R3 is live in to this function.
+  /// FIXME: Remove when register scavenger for Thumb is done.
+  bool R3IsLiveIn;
+
+  /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+  /// spill stack offset.
+  unsigned FramePtrSpillOffset;
+
+  /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+  /// register spills areas. For Mac OS X:
+  ///
+  /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+  /// --------------------------------------------
+  /// GPR callee-saved (2) : r8, r10, r11
+  /// --------------------------------------------
+  /// DPR callee-saved : d8 - d15
+  unsigned GPRCS1Offset;
+  unsigned GPRCS2Offset;
+  unsigned DPRCSOffset;
+
+  /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+  /// areas.
+  unsigned GPRCS1Size;
+  unsigned GPRCS2Size;
+  unsigned DPRCSSize;
+
+  /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+  /// which belong to these spill areas.
+  BitVector GPRCS1Frames;
+  BitVector GPRCS2Frames;
+  BitVector DPRCSFrames;
+
+  /// SpilledCSRegs - A BitVector mask of all spilled callee-saved registers.
+  ///
+  BitVector SpilledCSRegs;
+
+  /// JumpTableUId - Unique id for jumptables.
+  ///
+  unsigned JumpTableUId;
+
+  unsigned ConstPoolEntryUId;
+
+public:
+  ARMFunctionInfo() :
+    isThumb(false), 
+    Align(2U),
+    VarArgsRegSaveSize(0), HasStackFrame(false),
+    LRSpilledForFarJump(false), R3IsLiveIn(false),
+    FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+    GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+    JumpTableUId(0), ConstPoolEntryUId(0) {}
+
+  ARMFunctionInfo(MachineFunction &MF) :
+    isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+    Align(isThumb ? 1U : 2U),
+    VarArgsRegSaveSize(0), HasStackFrame(false),
+    LRSpilledForFarJump(false), R3IsLiveIn(false),
+    FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+    GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+    SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
+    JumpTableUId(0), ConstPoolEntryUId(0) {}
+
+  bool isThumbFunction() const { return isThumb; }
+
+  unsigned getAlign() const { return Align; }
+  void setAlign(unsigned a) { Align = a; }
+
+  unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+  void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+  bool hasStackFrame() const { return HasStackFrame; }
+  void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+  bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+  void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+  // FIXME: Remove when register scavenger for Thumb is done.
+  bool isR3LiveIn() const { return R3IsLiveIn; }
+  void setR3IsLiveIn(bool l) { R3IsLiveIn = l; }
+
+  unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+  void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+  
+  unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+  unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+  unsigned getDPRCalleeSavedAreaOffset()  const { return DPRCSOffset; }
+
+  void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+  void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+  void setDPRCalleeSavedAreaOffset(unsigned o)  { DPRCSOffset = o; }
+
+  unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+  unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+  unsigned getDPRCalleeSavedAreaSize()  const { return DPRCSSize; }
+
+  void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+  void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+  void setDPRCalleeSavedAreaSize(unsigned s)  { DPRCSSize = s; }
+
+  bool isGPRCalleeSavedArea1Frame(int fi) const {
+    if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+      return false;
+    return GPRCS1Frames[fi];
+  }
+  bool isGPRCalleeSavedArea2Frame(int fi) const {
+    if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+      return false;
+    return GPRCS2Frames[fi];
+  }
+  bool isDPRCalleeSavedAreaFrame(int fi) const {
+    if (fi < 0 || fi >= (int)DPRCSFrames.size())
+      return false;
+    return DPRCSFrames[fi];
+  }
+
+  void addGPRCalleeSavedArea1Frame(int fi) {
+    if (fi >= 0) {
+      int Size = GPRCS1Frames.size();
+      if (fi >= Size) {
+        Size *= 2;
+        if (fi >= Size)
+          Size = fi+1;
+        GPRCS1Frames.resize(Size);
+      }
+      GPRCS1Frames[fi] = true;
+    }
+  }
+  void addGPRCalleeSavedArea2Frame(int fi) {
+    if (fi >= 0) {
+      int Size = GPRCS2Frames.size();
+      if (fi >= Size) {
+        Size *= 2;
+        if (fi >= Size)
+          Size = fi+1;
+        GPRCS2Frames.resize(Size);
+      }
+      GPRCS2Frames[fi] = true;
+    }
+  }
+  void addDPRCalleeSavedAreaFrame(int fi) {
+    if (fi >= 0) {
+      int Size = DPRCSFrames.size();
+      if (fi >= Size) {
+        Size *= 2;
+        if (fi >= Size)
+          Size = fi+1;
+        DPRCSFrames.resize(Size);
+      }
+      DPRCSFrames[fi] = true;
+    }
+  }
+
+  void setCSRegisterIsSpilled(unsigned Reg) {
+    SpilledCSRegs.set(Reg);
+  }
+
+  bool isCSRegisterSpilled(unsigned Reg) const {
+    return SpilledCSRegs[Reg];
+  }
+
+  const BitVector &getSpilledCSRegisters() const {
+    return SpilledCSRegs;
+  }
+
+  unsigned createJumpTableUId() {
+    return JumpTableUId++;
+  }
+
+  unsigned getNumJumpTables() const {
+    return JumpTableUId;
+  }
+
+  void initConstPoolEntryUId(unsigned UId) {
+    ConstPoolEntryUId = UId;
+  }
+
+  unsigned getNumConstPoolEntries() const {
+    return ConstPoolEntryUId;
+  }
+
+  unsigned createConstPoolEntryUId() {
+    return ConstPoolEntryUId++;
+  }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 0000000..199858f
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,1528 @@
+//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<bool> ThumbRegScavenging("enable-thumb-reg-scavenging",
+                               cl::Hidden,
+                               cl::desc("Enable register scavenging on Thumb"));
+
+unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+  using namespace ARM;
+  switch (RegEnum) {
+  case R0:  case S0:  case D0:  return 0;
+  case R1:  case S1:  case D1:  return 1;
+  case R2:  case S2:  case D2:  return 2;
+  case R3:  case S3:  case D3:  return 3;
+  case R4:  case S4:  case D4:  return 4;
+  case R5:  case S5:  case D5:  return 5;
+  case R6:  case S6:  case D6:  return 6;
+  case R7:  case S7:  case D7:  return 7;
+  case R8:  case S8:  case D8:  return 8;
+  case R9:  case S9:  case D9:  return 9;
+  case R10: case S10: case D10: return 10;
+  case R11: case S11: case D11: return 11;
+  case R12: case S12: case D12: return 12;
+  case SP:  case S13: case D13: return 13;
+  case LR:  case S14: case D14: return 14;
+  case PC:  case S15: case D15: return 15;
+  case S16: return 16;
+  case S17: return 17;
+  case S18: return 18;
+  case S19: return 19;
+  case S20: return 20;
+  case S21: return 21;
+  case S22: return 22;
+  case S23: return 23;
+  case S24: return 24;
+  case S25: return 25;
+  case S26: return 26;
+  case S27: return 27;
+  case S28: return 28;
+  case S29: return 29;
+  case S30: return 30;
+  case S31: return 31;
+  default:
+    assert(0 && "Unknown ARM register!");
+    abort();
+  }
+}
+
+unsigned ARMRegisterInfo::getRegisterNumbering(unsigned RegEnum,
+                                               bool &isSPVFP) {
+  isSPVFP = false;
+
+  using namespace ARM;
+  switch (RegEnum) {
+  default:
+    assert(0 && "Unknown ARM register!");
+    abort();
+  case R0:  case D0:  return 0;
+  case R1:  case D1:  return 1;
+  case R2:  case D2:  return 2;
+  case R3:  case D3:  return 3;
+  case R4:  case D4:  return 4;
+  case R5:  case D5:  return 5;
+  case R6:  case D6:  return 6;
+  case R7:  case D7:  return 7;
+  case R8:  case D8:  return 8;
+  case R9:  case D9:  return 9;
+  case R10: case D10: return 10;
+  case R11: case D11: return 11;
+  case R12: case D12: return 12;
+  case SP:  case D13: return 13;
+  case LR:  case D14: return 14;
+  case PC:  case D15: return 15;
+
+  case S0: case S1: case S2: case S3:
+  case S4: case S5: case S6: case S7: 
+  case S8: case S9: case S10: case S11: 
+  case S12: case S13: case S14: case S15: 
+  case S16: case S17: case S18: case S19: 
+  case S20: case S21: case S22: case S23: 
+  case S24: case S25: case S26: case S27: 
+  case S28: case S29: case S30: case S31:  {
+    isSPVFP = true;
+    switch (RegEnum) {
+    default: return 0; // Avoid compile time warning.
+    case S0: return 0;
+    case S1: return 1;
+    case S2: return 2;
+    case S3: return 3;
+    case S4: return 4;
+    case S5: return 5;
+    case S6: return 6;
+    case S7: return 7;
+    case S8: return 8;
+    case S9: return 9;
+    case S10: return 10;
+    case S11: return 11;
+    case S12: return 12;
+    case S13: return 13;
+    case S14: return 14;
+    case S15: return 15;
+    case S16: return 16;
+    case S17: return 17;
+    case S18: return 18;
+    case S19: return 19;
+    case S20: return 20;
+    case S21: return 21;
+    case S22: return 22;
+    case S23: return 23;
+    case S24: return 24;
+    case S25: return 25;
+    case S26: return 26;
+    case S27: return 27;
+    case S28: return 28;
+    case S29: return 29;
+    case S30: return 30;
+    case S31: return 31;
+    }
+  }
+  }
+}
+
+ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
+                                 const ARMSubtarget &sti)
+  : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+    TII(tii), STI(sti),
+    FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+  return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+  return MIB.addReg(0);
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator &MBBI,
+                                        unsigned DestReg, int Val,
+                                        unsigned Pred, unsigned PredReg,
+                                        const TargetInstrInfo *TII,
+                                        bool isThumb,
+                                        DebugLoc dl) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  Constant *C = ConstantInt::get(Type::Int32Ty, Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+  if (isThumb)
+    BuildMI(MBB, MBBI, dl, 
+            TII->get(ARM::tLDRcp),DestReg).addConstantPoolIndex(Idx);
+  else
+    BuildMI(MBB, MBBI, dl, TII->get(ARM::LDRcp), DestReg)
+      .addConstantPoolIndex(Idx)
+      .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
+}
+
+const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
+  return &ARM::GPRRegClass;
+}
+
+/// isLowRegister - Returns true if the register is low register r0-r7.
+///
+bool ARMRegisterInfo::isLowRegister(unsigned Reg) const {
+  using namespace ARM;
+  switch (Reg) {
+  case R0:  case R1:  case R2:  case R3:
+  case R4:  case R5:  case R6:  case R7:
+    return true;
+  default:
+    return false;
+  }
+}
+
+const TargetRegisterClass*
+ARMRegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, MVT VT) const {
+  if (STI.isThumb()) {
+    if (isLowRegister(Reg))
+      return ARM::tGPRRegisterClass;
+    switch (Reg) {
+    default:
+      break;
+    case ARM::R8:  case ARM::R9:  case ARM::R10:  case ARM::R11:
+    case ARM::R12: case ARM::SP:  case ARM::LR:   case ARM::PC:
+      return ARM::GPRRegisterClass;
+    }
+  }
+  return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
+}
+
+const unsigned*
+ARMRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const unsigned CalleeSavedRegs[] = {
+    ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+    ARM::R7, ARM::R6,  ARM::R5,  ARM::R4,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
+  };
+
+  static const unsigned DarwinCalleeSavedRegs[] = {
+    ARM::LR,  ARM::R7,  ARM::R6, ARM::R5, ARM::R4,
+    ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
+  };
+  return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+const TargetRegisterClass* const *
+ARMRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+  static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    0
+  };
+  static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = {
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
+    &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass,
+    &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass,
+
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
+    0
+  };
+  return STI.isThumb() ? ThumbCalleeSavedRegClasses : CalleeSavedRegClasses;
+}
+
+BitVector ARMRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  // FIXME: avoid re-calculating this everytime.
+  BitVector Reserved(getNumRegs());
+  Reserved.set(ARM::SP);
+  Reserved.set(ARM::PC);
+  if (STI.isTargetDarwin() || hasFP(MF))
+    Reserved.set(FramePtr);
+  // Some targets reserve R9.
+  if (STI.isR9Reserved())
+    Reserved.set(ARM::R9);
+  return Reserved;
+}
+
+bool
+ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
+  switch (Reg) {
+  default: break;
+  case ARM::SP:
+  case ARM::PC:
+    return true;
+  case ARM::R7:
+  case ARM::R11:
+    if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+      return true;
+    break;
+  case ARM::R9:
+    return STI.isR9Reserved();
+  }
+
+  return false;
+}
+
+bool
+ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  return ThumbRegScavenging || !AFI->isThumbFunction();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+///
+bool ARMRegisterInfo::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+// not required, we reserve argument space for call sites in the function
+// immediately on entry to the current function. This eliminates the need for
+// add/sub sp brackets around call sites. Returns true if the call frame is
+// included as part of the stack frame.
+bool ARMRegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (AFI->isThumbFunction()) {
+    if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+      return false;
+  } else {
+    if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
+      return false;
+  }
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// emitARMRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in ARM code.
+static
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI,
+                             unsigned DestReg, unsigned BaseReg, int NumBytes,
+                             ARMCC::CondCodes Pred, unsigned PredReg,
+                             const TargetInstrInfo &TII,
+                             DebugLoc dl) {
+  bool isSub = NumBytes < 0;
+  if (isSub) NumBytes = -NumBytes;
+
+  while (NumBytes) {
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+    assert(ThisVal && "Didn't extract field correctly");
+    
+    // We will handle these bits from offset, clear them.
+    NumBytes &= ~ThisVal;
+    
+    // Get the properly encoded SOImmVal field.
+    int SOImmVal = ARM_AM::getSOImmVal(ThisVal);
+    assert(SOImmVal != -1 && "Bit extraction didn't work?");
+    
+    // Build the new ADD / SUB.
+    BuildMI(MBB, MBBI, dl, TII.get(isSub ? ARM::SUBri : ARM::ADDri), DestReg)
+      .addReg(BaseReg, RegState::Kill).addImm(SOImmVal)
+      .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+    BaseReg = DestReg;
+  }
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+                          unsigned NumBits, unsigned Scale) {
+  unsigned NumMIs = 0;
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+  if (Opc == ARM::tADDrSPi) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    NumMIs++;
+    NumBits = 8;
+    Scale = 1;  // Followed by a number of tADDi8.
+    Chunk = ((1 << NumBits) - 1) * Scale;
+  }
+
+  NumMIs += Bytes / Chunk;
+  if ((Bytes % Chunk) != 0)
+    NumMIs++;
+  if (ExtraOpc)
+    NumMIs++;
+  return NumMIs;
+}
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, unsigned BaseReg,
+                              int NumBytes, bool CanChangeCC,
+                              const TargetInstrInfo &TII,
+                              const ARMRegisterInfo& MRI,
+                              DebugLoc dl) {
+    bool isHigh = !MRI.isLowRegister(DestReg) ||
+                  (BaseReg != 0 && !MRI.isLowRegister(BaseReg));
+    bool isSub = false;
+    // Subtract doesn't have high register version. Load the negative value
+    // if either base or dest register is a high register. Also, if do not
+    // issue sub as part of the sequence if condition register is to be
+    // preserved.
+    if (NumBytes < 0 && !isHigh && CanChangeCC) {
+      isSub = true;
+      NumBytes = -NumBytes;
+    }
+    unsigned LdReg = DestReg;
+    if (DestReg == ARM::SP) {
+      assert(BaseReg == ARM::SP && "Unexpected!");
+      LdReg = ARM::R3;
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+        .addReg(ARM::R3, RegState::Kill);
+    }
+
+    if (NumBytes <= 255 && NumBytes >= 0)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+    else if (NumBytes < 0 && NumBytes >= -255) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg).addImm(NumBytes);
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), LdReg)
+        .addReg(LdReg, RegState::Kill);
+    } else
+      MRI.emitLoadConstPool(MBB, MBBI, LdReg, NumBytes, ARMCC::AL, 0, &TII, 
+                            true, dl);
+
+    // Emit add / sub.
+    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+    const MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, 
+                                            TII.get(Opc), DestReg);
+    if (DestReg == ARM::SP || isSub)
+      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+    else
+      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+    if (DestReg == ARM::SP)
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+        .addReg(ARM::R12, RegState::Kill);
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+static
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI,
+                               unsigned DestReg, unsigned BaseReg,
+                               int NumBytes, const TargetInstrInfo &TII,
+                               const ARMRegisterInfo& MRI,
+                               DebugLoc dl) {
+  bool isSub = NumBytes < 0;
+  unsigned Bytes = (unsigned)NumBytes;
+  if (isSub) Bytes = -NumBytes;
+  bool isMul4 = (Bytes & 3) == 0;
+  bool isTwoAddr = false;
+  bool DstNotEqBase = false;
+  unsigned NumBits = 1;
+  unsigned Scale = 1;
+  int Opc = 0;
+  int ExtraOpc = 0;
+
+  if (DestReg == BaseReg && BaseReg == ARM::SP) {
+    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+    NumBits = 7;
+    Scale = 4;
+    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+    isTwoAddr = true;
+  } else if (!isSub && BaseReg == ARM::SP) {
+    // r1 = add sp, 403
+    // =>
+    // r1 = add sp, 100 * 4
+    // r1 = add r1, 3
+    if (!isMul4) {
+      Bytes &= ~3;
+      ExtraOpc = ARM::tADDi3;
+    }
+    NumBits = 8;
+    Scale = 4;
+    Opc = ARM::tADDrSPi;
+  } else {
+    // sp = sub sp, c
+    // r1 = sub sp, c
+    // r8 = sub sp, c
+    if (DestReg != BaseReg)
+      DstNotEqBase = true;
+    NumBits = 8;
+    Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+    isTwoAddr = true;
+  }
+
+  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+  if (NumMIs > Threshold) {
+    // This will expand into too many instructions. Load the immediate from a
+    // constpool entry.
+    emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII,
+                             MRI, dl);
+    return;
+  }
+
+  if (DstNotEqBase) {
+    if (MRI.isLowRegister(DestReg) && MRI.isLowRegister(BaseReg)) {
+      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+      unsigned Chunk = (1 << 3) - 1;
+      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+      Bytes -= ThisVal;
+      BuildMI(MBB, MBBI, dl,TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3), DestReg)
+        .addReg(BaseReg, RegState::Kill).addImm(ThisVal);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+        .addReg(BaseReg, RegState::Kill);
+    }
+    BaseReg = DestReg;
+  }
+
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+  while (Bytes) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    ThisVal /= Scale;
+    // Build the new tADD / tSUB.
+    if (isTwoAddr)
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(DestReg).addImm(ThisVal);
+    else {
+      bool isKill = BaseReg != ARM::SP;
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+        .addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+      BaseReg = DestReg;
+
+      if (Opc == ARM::tADDrSPi) {
+        // r4 = add sp, imm
+        // r4 = add r4, imm
+        // ...
+        NumBits = 8;
+        Scale = 1;
+        Chunk = ((1 << NumBits) - 1) * Scale;
+        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+        isTwoAddr = true;
+      }
+    }
+  }
+
+  if (ExtraOpc)
+    BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg)
+      .addReg(DestReg, RegState::Kill)
+      .addImm(((unsigned)NumBytes) & 3);
+}
+
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                  int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg,
+                  bool isThumb, const TargetInstrInfo &TII, 
+                  const ARMRegisterInfo& MRI,
+                  DebugLoc dl) {
+  if (isThumb)
+    emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII,
+                              MRI, dl);
+  else
+    emitARMRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes,
+                            Pred, PredReg, TII, dl);
+}
+
+void ARMRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (!hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      bool isThumb = AFI->isThumbFunction();
+      ARMCC::CondCodes Pred = isThumb
+        ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(1).getImm();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+        unsigned PredReg = isThumb ? 0 : Old->getOperand(2).getReg();
+        emitSPUpdate(MBB, I, -Amount, Pred, PredReg, isThumb, TII, *this, dl);
+      } else {
+        // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+        unsigned PredReg = isThumb ? 0 : Old->getOperand(3).getReg();
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(MBB, I, Amount, Pred, PredReg, isThumb, TII, *this, dl);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, int Imm,
+                              const TargetInstrInfo &TII,
+                              const ARMRegisterInfo& MRI,
+                              DebugLoc dl) {
+  bool isSub = Imm < 0;
+  if (isSub) Imm = -Imm;
+
+  int Chunk = (1 << 8) - 1;
+  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+  Imm -= ThisVal;
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), DestReg).addImm(ThisVal);
+  if (Imm > 0) 
+    emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl);
+  if (isSub)
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tNEG), DestReg)
+      .addReg(DestReg, RegState::Kill);
+}
+
+/// findScratchRegister - Find a 'free' ARM register. If register scavenger
+/// is not being used, R12 is available. Otherwise, try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC,
+                             ARMFunctionInfo *AFI) {
+  unsigned Reg = RS ? RS->FindUnusedReg(RC, true) : (unsigned) ARM::R12;
+  assert (!AFI->isThumbFunction());
+  if (Reg == 0)
+    // Try a already spilled CS register.
+    Reg = RS->FindUnusedReg(RC, AFI->getSpilledCSRegisters());
+
+  return Reg;
+}
+
+void ARMRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                          int SPAdj, RegScavenger *RS) const{
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  bool isThumb = AFI->isThumbFunction();
+  DebugLoc dl = MI.getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+  
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + 
+               MF.getFrameInfo()->getStackSize() + SPAdj;
+
+  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex))
+    Offset -= AFI->getDPRCalleeSavedAreaOffset();
+  else if (hasFP(MF)) {
+    assert(SPAdj == 0 && "Unexpected");
+    // There is alloca()'s in this function, must reference off the frame
+    // pointer instead.
+    FrameReg = getFrameRegister(MF);
+    Offset -= AFI->getFramePtrSpillOffset();
+  }
+
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+
+  // Memory operands in inline assembly always use AddrMode2.
+  if (Opcode == ARM::INLINEASM)
+    AddrMode = ARMII::AddrMode2;
+
+  if (Opcode == ARM::ADDri) {
+    Offset += MI.getOperand(i+1).getImm();
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::MOVr));
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(i+1);
+      return;
+    } else if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setDesc(TII.get(ARM::SUBri));
+    }
+
+    // Common case: small offset, fits into instruction.
+    int ImmedOffset = ARM_AM::getSOImmVal(Offset);
+    if (ImmedOffset != -1) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(ImmedOffset);
+      return;
+    }
+    
+    // Otherwise, we fallback to common code below to form the imm offset with
+    // a sequence of ADDri instructions.  First though, pull as much of the imm
+    // into this ADDri as possible.
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+    
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+    
+    // Get the properly encoded SOImmVal field.
+    int ThisSOImmVal = ARM_AM::getSOImmVal(ThisImmVal);
+    assert(ThisSOImmVal != -1 && "Bit extraction didn't work?");    
+    MI.getOperand(i+1).ChangeToImmediate(ThisSOImmVal);
+  } else if (Opcode == ARM::tADDrSPi) {
+    Offset += MI.getOperand(i+1).getImm();
+
+    // Can't use tADDrSPi if it's based off the frame pointer.
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (FrameReg != ARM::SP) {
+      Opcode = ARM::tADDi3;
+      MI.setDesc(TII.get(ARM::tADDi3));
+      NumBits = 3;
+    } else {
+      NumBits = 8;
+      Scale = 4;
+      assert((Offset & 3) == 0 &&
+             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+    }
+
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVhir2lor));
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(i+1);
+      return;
+    }
+
+    // Common case: small offset, fits into instruction.
+    unsigned Mask = (1 << NumBits) - 1;
+    if (((Offset / Scale) & ~Mask) == 0) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+      return;
+    }
+
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+    // MI would expand into a large number of instructions. Don't try to
+    // simplify the immediate.
+    if (NumMIs > 2) {
+      emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
+                                *this, dl);
+      MBB.erase(II);
+      return;
+    }
+
+    if (Offset > 0) {
+      // Translate r0 = add sp, imm to
+      // r0 = add sp, 255*4
+      // r0 = add r0, (imm - 255*4)
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Mask);
+      Offset = (Offset - Mask * Scale);
+      MachineBasicBlock::iterator NII = next(II);
+      emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
+                                *this, dl);
+    } else {
+      // Translate r0 = add sp, -imm to
+      // r0 = -imm (this is then translated into a series of instructons)
+      // r0 = add r0, sp
+      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+      MI.setDesc(TII.get(ARM::tADDhirr));
+      MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
+      MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+    }
+    return;
+  } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrMode2: {
+      ImmIdx = i+2;
+      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 12;
+      break;
+    }
+    case ARMII::AddrMode3: {
+      ImmIdx = i+2;
+      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      break;
+    }
+    case ARMII::AddrMode5: {
+      ImmIdx = i+1;
+      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      Scale = 4;
+      break;
+    }
+    case ARMII::AddrModeTs: {
+      ImmIdx = i+1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+      Scale = 4;
+      break;
+    }
+    default:
+      assert(0 && "Unsupported addressing mode!");
+      abort();
+      break;
+    }
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+    if (Offset < 0 && !isThumb) {
+      Offset = -Offset;
+      isSub = true;
+    }
+
+    // Common case: small offset, fits into instruction.
+    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with sp
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      if (isSub)
+        ImmedOffset |= 1 << NumBits;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      return;
+    }
+
+    bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
+    if (AddrMode == ARMII::AddrModeTs) {
+      // Thumb tLDRspi, tSTRspi. These will change to instructions that use
+      // a different base register.
+      NumBits = 5;
+      Mask = (1 << NumBits) - 1;
+    }
+    // If this is a thumb spill / restore, we will be using a constpool load to
+    // materialize the offset.
+    if (AddrMode == ARMII::AddrModeTs && isThumSpillRestore)
+      ImmOp.ChangeToImmediate(0);
+    else {
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      if (isSub)
+        ImmedOffset |= 1 << NumBits;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask*Scale);
+    }
+  }
+  
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert(Offset && "This code isn't needed if offset already handled!");
+
+  if (isThumb) {
+    if (Desc.mayLoad()) {
+      // Use the destination register to materialize sp + offset.
+      unsigned TmpReg = MI.getOperand(0).getReg();
+      bool UseRR = false;
+      if (Opcode == ARM::tRestore) {
+        if (FrameReg == ARM::SP)
+          emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                   Offset, false, TII, *this, dl);
+        else {
+          emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
+                            true, dl);
+          UseRR = true;
+        }
+      } else
+        emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                  *this, dl);
+      MI.setDesc(TII.get(ARM::tLDR));
+      MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+      if (UseRR)
+        // Use [reg, reg] addrmode.
+        MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+      else  // tLDR has an extra register operand.
+        MI.addOperand(MachineOperand::CreateReg(0, false));
+    } else if (Desc.mayStore()) {
+      // FIXME! This is horrific!!! We need register scavenging.
+      // Our temporary workaround has marked r3 unavailable. Of course, r3 is
+      // also a ABI register so it's possible that is is the register that is
+      // being storing here. If that's the case, we do the following:
+      // r12 = r2
+      // Use r2 to materialize sp + offset
+      // str r3, r2
+      // r2 = r12
+      unsigned ValReg = MI.getOperand(0).getReg();
+      unsigned TmpReg = ARM::R3;
+      bool UseRR = false;
+      if (ValReg == ARM::R3) {
+        BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+          .addReg(ARM::R2, RegState::Kill);
+        TmpReg = ARM::R2;
+      }
+      if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+        BuildMI(MBB, II, dl, TII.get(ARM::tMOVlor2hir), ARM::R12)
+          .addReg(ARM::R3, RegState::Kill);
+      if (Opcode == ARM::tSpill) {
+        if (FrameReg == ARM::SP)
+          emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
+                                   Offset, false, TII, *this, dl);
+        else {
+          emitLoadConstPool(MBB, II, TmpReg, Offset, ARMCC::AL, 0, &TII,
+                            true, dl);
+          UseRR = true;
+        }
+      } else
+        emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
+                                  *this, dl);
+      MI.setDesc(TII.get(ARM::tSTR));
+      MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+      if (UseRR)  // Use [reg, reg] addrmode.
+        MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
+      else // tSTR has an extra register operand.
+        MI.addOperand(MachineOperand::CreateReg(0, false));
+
+      MachineBasicBlock::iterator NII = next(II);
+      if (ValReg == ARM::R3)
+        BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R2)
+          .addReg(ARM::R12, RegState::Kill);
+      if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
+        BuildMI(MBB, NII, dl, TII.get(ARM::tMOVhir2lor), ARM::R3)
+          .addReg(ARM::R12, RegState::Kill);
+    } else
+      assert(false && "Unexpected opcode!");
+  } else {
+    // Insert a set of r12 with the full address: r12 = sp + offset
+    // If the offset we have is too large to fit into the instruction, we need
+    // to form it with a series of ADDri's.  Do this by taking 8-bit chunks
+    // out of 'Offset'.
+    unsigned ScratchReg = findScratchRegister(RS, &ARM::GPRRegClass, AFI);
+    if (ScratchReg == 0)
+      // No register is "free". Scavenge a register.
+      ScratchReg = RS->scavengeRegister(&ARM::GPRRegClass, II, SPAdj);
+    int PIdx = MI.findFirstPredOperandIdx();
+    ARMCC::CondCodes Pred = (PIdx == -1)
+      ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+    unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+    emitARMRegPlusImmediate(MBB, II, ScratchReg, FrameReg,
+                            isSub ? -Offset : Offset, Pred, PredReg, TII, dl);
+    MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+  }
+}
+
+static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  int Offset = 0;
+  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -FFI->getObjectOffset(i);
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+    if (FFI->isDeadObjectIndex(i))
+      continue;
+    Offset += FFI->getObjectSize(i);
+    unsigned Align = FFI->getObjectAlignment(i);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+  }
+  return (unsigned)Offset;
+}
+
+void
+ARMRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                      RegScavenger *RS) const {
+  // This tells PEI to spill the FP as if it is any other callee-save register
+  // to take advantage the eliminateFrameIndex machinery. This also ensures it
+  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+  // to combine multiple loads / stores.
+  bool CanEliminateFrame = true;
+  bool CS1Spilled = false;
+  bool LRSpilled = false;
+  unsigned NumGPRSpills = 0;
+  SmallVector<unsigned, 4> UnspilledCS1GPRs;
+  SmallVector<unsigned, 4> UnspilledCS2GPRs;
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Don't spill FP if the frame can be eliminated. This is determined
+  // by scanning the callee-save registers to see if any is used.
+  const unsigned *CSRegs = getCalleeSavedRegs();
+  const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    bool Spilled = false;
+    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+      AFI->setCSRegisterIsSpilled(Reg);
+      Spilled = true;
+      CanEliminateFrame = false;
+    } else {
+      // Check alias registers too.
+      for (const unsigned *Aliases = getAliasSet(Reg); *Aliases; ++Aliases) {
+        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+          Spilled = true;
+          CanEliminateFrame = false;
+        }
+      }
+    }
+
+    if (CSRegClasses[i] == &ARM::GPRRegClass) {
+      if (Spilled) {
+        NumGPRSpills++;
+
+        if (!STI.isTargetDarwin()) {
+          if (Reg == ARM::LR)
+            LRSpilled = true;
+          CS1Spilled = true;
+          continue;
+        }
+
+        // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+        switch (Reg) {
+        case ARM::LR:
+          LRSpilled = true;
+          // Fallthrough
+        case ARM::R4:
+        case ARM::R5:
+        case ARM::R6:
+        case ARM::R7:
+          CS1Spilled = true;
+          break;
+        default:
+          break;
+        }
+      } else { 
+        if (!STI.isTargetDarwin()) {
+          UnspilledCS1GPRs.push_back(Reg);
+          continue;
+        }
+
+        switch (Reg) {
+        case ARM::R4:
+        case ARM::R5:
+        case ARM::R6:
+        case ARM::R7:
+        case ARM::LR:
+          UnspilledCS1GPRs.push_back(Reg);
+          break;
+        default:
+          UnspilledCS2GPRs.push_back(Reg);
+          break;
+        }
+      }
+    }
+  }
+
+  bool ForceLRSpill = false;
+  if (!LRSpilled && AFI->isThumbFunction()) {
+    unsigned FnSize = TII.GetFunctionSizeInBytes(MF);
+    // Force LR to be spilled if the Thumb function size is > 2048. This enables
+    // use of BL to implement far jump. If it turns out that it's not needed
+    // then the branch fix up path will undo it.
+    if (FnSize >= (1 << 11)) {
+      CanEliminateFrame = false;
+      ForceLRSpill = true;
+    }
+  }
+
+  bool ExtraCSSpill = false;
+  if (!CanEliminateFrame || hasFP(MF)) {
+    AFI->setHasStackFrame(true);
+
+    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+    if (!LRSpilled && CS1Spilled) {
+      MF.getRegInfo().setPhysRegUsed(ARM::LR);
+      AFI->setCSRegisterIsSpilled(ARM::LR);
+      NumGPRSpills++;
+      UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+                                    UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+      ForceLRSpill = false;
+      ExtraCSSpill = true;
+    }
+
+    // Darwin ABI requires FP to point to the stack slot that contains the
+    // previous FP.
+    if (STI.isTargetDarwin() || hasFP(MF)) {
+      MF.getRegInfo().setPhysRegUsed(FramePtr);
+      NumGPRSpills++;
+    }
+
+    // If stack and double are 8-byte aligned and we are spilling an odd number
+    // of GPRs. Spill one extra callee save GPR so we won't have to pad between
+    // the integer and double callee save areas.
+    unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+      if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+        for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+          unsigned Reg = UnspilledCS1GPRs[i];
+          // Don't spiil high register if the function is thumb
+          if (!AFI->isThumbFunction() || isLowRegister(Reg) || Reg == ARM::LR) {
+            MF.getRegInfo().setPhysRegUsed(Reg);
+            AFI->setCSRegisterIsSpilled(Reg);
+            if (!isReservedReg(MF, Reg))
+              ExtraCSSpill = true;
+            break;
+          }
+        }
+      } else if (!UnspilledCS2GPRs.empty() &&
+                 !AFI->isThumbFunction()) {
+        unsigned Reg = UnspilledCS2GPRs.front();
+        MF.getRegInfo().setPhysRegUsed(Reg);
+        AFI->setCSRegisterIsSpilled(Reg);
+        if (!isReservedReg(MF, Reg))
+          ExtraCSSpill = true;
+      }
+    }
+
+    // Estimate if we might need to scavenge a register at some point in order
+    // to materialize a stack offset. If so, either spill one additiona
+    // callee-saved register or reserve a special spill slot to facilitate
+    // register scavenging.
+    if (RS && !ExtraCSSpill && !AFI->isThumbFunction()) {
+      MachineFrameInfo  *MFI = MF.getFrameInfo();
+      unsigned Size = estimateStackSize(MF, MFI);
+      unsigned Limit = (1 << 12) - 1;
+      for (MachineFunction::iterator BB = MF.begin(),E = MF.end();BB != E; ++BB)
+        for (MachineBasicBlock::iterator I= BB->begin(); I != BB->end(); ++I) {
+          for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+            if (I->getOperand(i).isFI()) {
+              unsigned Opcode = I->getOpcode();
+              const TargetInstrDesc &Desc = TII.get(Opcode);
+              unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+              if (AddrMode == ARMII::AddrMode3) {
+                Limit = (1 << 8) - 1;
+                goto DoneEstimating;
+              } else if (AddrMode == ARMII::AddrMode5) {
+                unsigned ThisLimit = ((1 << 8) - 1) * 4;
+                if (ThisLimit < Limit)
+                  Limit = ThisLimit;
+              }
+            }
+        }
+    DoneEstimating:
+      if (Size >= Limit) {
+        // If any non-reserved CS register isn't spilled, just spill one or two
+        // extra. That should take care of it!
+        unsigned NumExtras = TargetAlign / 4;
+        SmallVector<unsigned, 2> Extras;
+        while (NumExtras && !UnspilledCS1GPRs.empty()) {
+          unsigned Reg = UnspilledCS1GPRs.back();
+          UnspilledCS1GPRs.pop_back();
+          if (!isReservedReg(MF, Reg)) {
+            Extras.push_back(Reg);
+            NumExtras--;
+          }
+        }
+        while (NumExtras && !UnspilledCS2GPRs.empty()) {
+          unsigned Reg = UnspilledCS2GPRs.back();
+          UnspilledCS2GPRs.pop_back();
+          if (!isReservedReg(MF, Reg)) {
+            Extras.push_back(Reg);
+            NumExtras--;
+          }
+        }
+        if (Extras.size() && NumExtras == 0) {
+          for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+            MF.getRegInfo().setPhysRegUsed(Extras[i]);
+            AFI->setCSRegisterIsSpilled(Extras[i]);
+          }
+        } else {
+          // Reserve a slot closest to SP or frame pointer.
+          const TargetRegisterClass *RC = &ARM::GPRRegClass;
+          RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                           RC->getAlignment()));
+        }
+      }
+    }
+  }
+
+  if (ForceLRSpill) {
+    MF.getRegInfo().setPhysRegUsed(ARM::LR);
+    AFI->setCSRegisterIsSpilled(ARM::LR);
+    AFI->setLRIsSpilledForFarJump(true);
+  }
+}
+
+/// Move iterator pass the next bunch of callee save load / store ops for
+/// the particular spill area (1: integer area 1, 2: integer area 2,
+/// 3: fp area, 0: don't care).
+static void movePastCSLoadStoreOps(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator &MBBI,
+                                   int Opc, unsigned Area,
+                                   const ARMSubtarget &STI) {
+  while (MBBI != MBB.end() &&
+         MBBI->getOpcode() == Opc && MBBI->getOperand(1).isFI()) {
+    if (Area != 0) {
+      bool Done = false;
+      unsigned Category = 0;
+      switch (MBBI->getOperand(0).getReg()) {
+      case ARM::R4:  case ARM::R5:  case ARM::R6: case ARM::R7:
+      case ARM::LR:
+        Category = 1;
+        break;
+      case ARM::R8:  case ARM::R9:  case ARM::R10: case ARM::R11:
+        Category = STI.isTargetDarwin() ? 2 : 1;
+        break;
+      case ARM::D8:  case ARM::D9:  case ARM::D10: case ARM::D11:
+      case ARM::D12: case ARM::D13: case ARM::D14: case ARM::D15:
+        Category = 3;
+        break;
+      default:
+        Done = true;
+        break;
+      }
+      if (Done || Category != Area)
+        break;
+    }
+
+    ++MBBI;
+  }
+}
+
+void ARMRegisterInfo::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  bool isThumb = AFI->isThumbFunction();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = (MBBI != MBB.end() ?
+                 MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+  if (isThumb) {
+    // Check if R3 is live in. It might have to be used as a scratch register.
+    for (MachineRegisterInfo::livein_iterator I =MF.getRegInfo().livein_begin(),
+         E = MF.getRegInfo().livein_end(); I != E; ++I) {
+      if (I->first == ARM::R3) {
+        AFI->setR3IsLiveIn(true);
+        break;
+      }
+    }
+
+    // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+    NumBytes = (NumBytes + 3) & ~3;
+    MFI->setStackSize(NumBytes);
+  }
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, -VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
+                 *this, dl);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  if (!isThumb) {
+    // Build the new SUBri to adjust SP for integer callee-save spill area 1.
+    emitSPUpdate(MBB, MBBI, -GPRCS1Size, ARMCC::AL, 0, isThumb, TII, *this, dl);
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 1, STI);
+  } else if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+    ++MBBI;
+    if (MBBI != MBB.end())
+      dl = MBBI->getDebugLoc();
+  }
+
+  // Darwin ABI requires FP to point to the stack slot that contains the
+  // previous FP.
+  if (STI.isTargetDarwin() || hasFP(MF)) {
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(isThumb ? ARM::tADDrSPi : ARM::ADDri), 
+              FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0);
+    if (!isThumb) AddDefaultCC(AddDefaultPred(MIB));
+  }
+
+  if (!isThumb) {
+    // Build the new SUBri to adjust SP for integer callee-save spill area 2.
+    emitSPUpdate(MBB, MBBI, -GPRCS2Size, ARMCC::AL, 0, false, TII, *this, dl);
+
+    // Build the new SUBri to adjust SP for FP callee-save spill area.
+    movePastCSLoadStoreOps(MBB, MBBI, ARM::STR, 2, STI);
+    emitSPUpdate(MBB, MBBI, -DPRCSSize, ARMCC::AL, 0, false, TII, *this, dl);
+  }
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+  
+  NumBytes = DPRCSOffset;
+  if (NumBytes) {
+    // Insert it after all the callee-save spills.
+    if (!isThumb)
+      movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 3, STI);
+    emitSPUpdate(MBB, MBBI, -NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+  }
+
+  if(STI.isTargetELF() && hasFP(MF)) {
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+  }
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+  return ((MI->getOpcode() == ARM::FLDD ||
+           MI->getOpcode() == ARM::LDR  ||
+           MI->getOpcode() == ARM::tRestore) &&
+          MI->getOperand(1).isFI() &&
+          isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs));
+}
+
+void ARMRegisterInfo::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  assert((MBBI->getOpcode() == ARM::BX_RET ||
+          MBBI->getOpcode() == ARM::tBX_RET ||
+          MBBI->getOpcode() == ARM::tPOP_RET) &&
+         "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  bool isThumb = AFI->isThumbFunction();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII, *this, dl);
+  } else {
+    // Unwind MBBI to point to first LDR / FLDD.
+    const unsigned *CSRegs = getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+      if (!isCSRestore(MBBI, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+    if (isThumb) {
+      if (hasFP(MF)) {
+        NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+        // Reset SP based on frame pointer only if the stack frame extends beyond
+        // frame pointer stack slot or target is ELF and the function has FP.
+        if (NumBytes)
+          emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, FramePtr, -NumBytes,
+                                    TII, *this, dl);
+        else
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVlor2hir), ARM::SP)
+            .addReg(FramePtr);
+      } else {
+        if (MBBI->getOpcode() == ARM::tBX_RET &&
+            &MBB.front() != MBBI &&
+            prior(MBBI)->getOpcode() == ARM::tPOP) {
+          MachineBasicBlock::iterator PMBBI = prior(MBBI);
+          emitSPUpdate(MBB, PMBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
+                       *this, dl);
+        } else
+          emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, isThumb, TII,
+                       *this, dl);
+      }
+    } else {
+      // Darwin ABI requires FP to point to the stack slot that contains the
+      // previous FP.
+      if ((STI.isTargetDarwin() && NumBytes) || hasFP(MF)) {
+        NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+        // Reset SP based on frame pointer only if the stack frame extends beyond
+        // frame pointer stack slot or target is ELF and the function has FP.
+        if (AFI->getGPRCalleeSavedArea2Size() ||
+            AFI->getDPRCalleeSavedAreaSize()  ||
+            AFI->getDPRCalleeSavedAreaOffset()||
+            hasFP(MF)) {
+          if (NumBytes)
+            BuildMI(MBB, MBBI, dl, TII.get(ARM::SUBri), ARM::SP).addReg(FramePtr)
+              .addImm(NumBytes)
+              .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+          else
+            BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP).addReg(FramePtr)
+              .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+        }
+      } else if (NumBytes) {
+        emitSPUpdate(MBB, MBBI, NumBytes, ARMCC::AL, 0, false, TII, *this, dl);
+      }
+
+      // Move SP to start of integer callee save spill area 2.
+      movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 3, STI);
+      emitSPUpdate(MBB, MBBI, AFI->getDPRCalleeSavedAreaSize(), ARMCC::AL, 0,
+                   false, TII, *this, dl);
+
+      // Move SP to start of integer callee save spill area 1.
+      movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 2, STI);
+      emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea2Size(), ARMCC::AL, 0,
+                   false, TII, *this, dl);
+
+      // Move SP to SP upon entry to the function.
+      movePastCSLoadStoreOps(MBB, MBBI, ARM::LDR, 1, STI);
+      emitSPUpdate(MBB, MBBI, AFI->getGPRCalleeSavedArea1Size(), ARMCC::AL, 0,
+                   false, TII, *this, dl);
+    }
+  }
+
+  if (VARegSaveSize) {
+    if (isThumb)
+      // Epilogue for vararg functions: pop LR to R3 and branch off it.
+      // FIXME: Verify this is still ok when R3 is no longer being reserved.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)).addReg(ARM::R3);
+
+    emitSPUpdate(MBB, MBBI, VARegSaveSize, ARMCC::AL, 0, isThumb, TII,
+                 *this, dl);
+
+    if (isThumb) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3);
+      MBB.erase(MBBI);
+    }
+  }
+}
+
+unsigned ARMRegisterInfo::getRARegister() const {
+  return ARM::LR;
+}
+
+unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+  if (STI.isTargetDarwin() || hasFP(MF))
+    return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11;
+  else
+    return ARM::SP;
+}
+
+unsigned ARMRegisterInfo::getEHExceptionRegister() const {
+  assert(0 && "What is the exception register");
+  return 0;
+}
+
+unsigned ARMRegisterInfo::getEHHandlerRegister() const {
+  assert(0 && "What is the exception handler register");
+  return 0;
+}
+
+int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 0000000..e1d9efb
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,102 @@
+//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMGenRegisterInfo.h.inc"
+
+namespace llvm {
+  class ARMSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+struct ARMRegisterInfo : public ARMGenRegisterInfo {
+  const TargetInstrInfo &TII;
+  const ARMSubtarget &STI;
+private:
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
+
+public:
+  ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         unsigned DestReg, int Val,
+                         unsigned Pred, unsigned PredReg,
+                         const TargetInstrInfo *TII, bool isThumb,
+                         DebugLoc dl) const;
+
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// ARM::LR, return the number that it corresponds to (e.g. 14).
+  static unsigned getRegisterNumbering(unsigned RegEnum);
+
+  /// Same as previous getRegisterNumbering except it returns true in isSPVFP
+  /// if the register is a single precision VFP register.
+  static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
+
+  /// getPointerRegClass - Return the register class to use to hold pointers.
+  /// This is used for addressing modes.
+  const TargetRegisterClass *getPointerRegClass() const;
+
+  /// Code Generation virtual methods...
+  const TargetRegisterClass *
+    getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  const TargetRegisterClass* const*
+  getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  bool hasReservedCallFrame(MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS = NULL) const;
+
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+  
+  bool isLowRegister(unsigned Reg) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 0000000..e8daf74
--- /dev/null
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,221 @@
+//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
+  field bits<4> Num;
+  let Namespace = "ARM";
+  let SubRegs = subregs;
+}
+
+class ARMFReg<bits<5> num, string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "ARM";
+}
+
+// Integer registers
+def R0  : ARMReg< 0, "r0">,  DwarfRegNum<[0]>;
+def R1  : ARMReg< 1, "r1">,  DwarfRegNum<[1]>;
+def R2  : ARMReg< 2, "r2">,  DwarfRegNum<[2]>;
+def R3  : ARMReg< 3, "r3">,  DwarfRegNum<[3]>;
+def R4  : ARMReg< 4, "r4">,  DwarfRegNum<[4]>;
+def R5  : ARMReg< 5, "r5">,  DwarfRegNum<[5]>;
+def R6  : ARMReg< 6, "r6">,  DwarfRegNum<[6]>;
+def R7  : ARMReg< 7, "r7">,  DwarfRegNum<[7]>;
+def R8  : ARMReg< 8, "r8">,  DwarfRegNum<[8]>;
+def R9  : ARMReg< 9, "r9">,  DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP  : ARMReg<13, "sp">,  DwarfRegNum<[13]>;
+def LR  : ARMReg<14, "lr">,  DwarfRegNum<[14]>;
+def PC  : ARMReg<15, "pc">,  DwarfRegNum<[15]>;
+
+// Float registers
+def S0  : ARMFReg< 0, "s0">;  def S1  : ARMFReg< 1, "s1">;
+def S2  : ARMFReg< 2, "s2">;  def S3  : ARMFReg< 3, "s3">;
+def S4  : ARMFReg< 4, "s4">;  def S5  : ARMFReg< 5, "s5">;
+def S6  : ARMFReg< 6, "s6">;  def S7  : ARMFReg< 7, "s7">;
+def S8  : ARMFReg< 8, "s8">;  def S9  : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0  : ARMReg< 0,  "d0", [S0,   S1]>;
+def D1  : ARMReg< 1,  "d1", [S2,   S3]>; 
+def D2  : ARMReg< 2,  "d2", [S4,   S5]>;
+def D3  : ARMReg< 3,  "d3", [S6,   S7]>;
+def D4  : ARMReg< 4,  "d4", [S8,   S9]>;
+def D5  : ARMReg< 5,  "d5", [S10, S11]>;
+def D6  : ARMReg< 6,  "d6", [S12, S13]>;
+def D7  : ARMReg< 7,  "d7", [S14, S15]>;
+def D8  : ARMReg< 8,  "d8", [S16, S17]>;
+def D9  : ARMReg< 9,  "d9", [S18, S19]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>;
+
+// Current Program Status Register.
+def CPSR : ARMReg<0, "cpsr">;
+
+// Register classes.
+//
+// pc  == Program Counter
+// lr  == Link Register
+// sp  == Stack Pointer
+// r12 == ip (scratch)
+// r7  == Frame Pointer (thumb-style backtraces)
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+                                           R7, R8, R9, R10, R12, R11,
+                                           LR, SP, PC]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  // FIXME: We are reserving r12 in case the PEI needs to use it to
+  // generate large stack offset. Make it available once we have register
+  // scavenging. Similarly r3 is reserved in Thumb mode for now.
+  let MethodBodies = [{
+    // FP is R11, R9 is available.
+    static const unsigned ARM_GPR_AO_1[] = {
+      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R12,ARM::LR,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+      ARM::R8, ARM::R9, ARM::R10,
+      ARM::R11 };
+    // FP is R11, R9 is not available.
+    static const unsigned ARM_GPR_AO_2[] = {
+      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R12,ARM::LR,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+      ARM::R8, ARM::R10,
+      ARM::R11 };
+    // FP is R7, R9 is available.
+    static const unsigned ARM_GPR_AO_3[] = {
+      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R12,ARM::LR,
+      ARM::R4, ARM::R5, ARM::R6,
+      ARM::R8, ARM::R9, ARM::R10,ARM::R11,
+      ARM::R7 };
+    // FP is R7, R9 is not available.
+    static const unsigned ARM_GPR_AO_4[] = {
+      ARM::R3, ARM::R2, ARM::R1, ARM::R0,
+      ARM::R12,ARM::LR,
+      ARM::R4, ARM::R5, ARM::R6,
+      ARM::R8, ARM::R10,ARM::R11,
+      ARM::R7 };
+
+    GPRClass::iterator
+    GPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      if (Subtarget.useThumbBacktraces()) {
+        if (Subtarget.isR9Reserved())
+          return ARM_GPR_AO_4;
+        else
+          return ARM_GPR_AO_3;
+      } else {
+        if (Subtarget.isR9Reserved())
+          return ARM_GPR_AO_2;
+        else
+          return ARM_GPR_AO_1;
+      }
+    }
+
+    GPRClass::iterator
+    GPRClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      GPRClass::iterator I;
+
+      if (Subtarget.useThumbBacktraces()) {
+        if (Subtarget.isR9Reserved()) {
+          I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
+        } else {
+          I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
+        }
+      } else {
+        if (Subtarget.isR9Reserved()) {
+          I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
+        } else {
+          I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
+        }
+      }
+
+      // Mac OS X requires FP not to be clobbered for backtracing purpose.
+      return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+    }
+  }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  // FIXME: We are reserving r3 in Thumb mode in case the PEI needs to use it
+  // to generate large stack offset. Make it available once we have register
+  // scavenging.
+  let MethodBodies = [{
+    static const unsigned THUMB_tGPR_AO[] = {
+      ARM::R2, ARM::R1, ARM::R0,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+    // FP is R7, only low registers available.
+    tGPRClass::iterator
+    tGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      return THUMB_tGPR_AO;
+    }
+
+    tGPRClass::iterator
+    tGPRClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      tGPRClass::iterator I =
+        THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
+      // Mac OS X requires FP not to be clobbered for backtracing purpose.
+      return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+    }
+  }];
+}
+
+def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
+  S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
+  S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
+
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
+  D9, D10, D11, D12, D13, D14, D15]>;
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 0000000..2cc2950
--- /dev/null
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,56 @@
+//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace ARM {
+    enum RelocationType {
+      // reloc_arm_absolute - Absolute relocation, just add the relocated value
+      // to the value already in memory.
+      reloc_arm_absolute,
+
+      // reloc_arm_relative - PC relative relocation, add the relocated value to
+      // the value already in memory, after we adjust it for where the PC is.
+      reloc_arm_relative,
+
+      // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
+      // addresses are kept locally in a map.
+      reloc_arm_cp_entry,
+
+      // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
+      // should be divided by 4.
+      reloc_arm_vfp_cp_entry,
+
+      // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
+      // entry.
+      reloc_arm_machine_cp_entry,
+
+      // reloc_arm_jt_base - PC relative relocation for jump tables whose
+      // addresses are kept locally in a map.
+      reloc_arm_jt_base,
+
+      // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
+      reloc_arm_pic_jt,
+
+      // reloc_arm_branch - Branch address relocation.
+      reloc_arm_branch
+    };
+  }
+}
+
+#endif
+
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 0000000..ef78cd5
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,84 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMGenSubtarget.inc"
+#include "llvm/Module.h"
+using namespace llvm;
+
+ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
+                           bool isThumb)
+  : ARMArchVersion(V4T)
+  , ARMFPUType(None)
+  , IsThumb(isThumb)
+  , ThumbMode(Thumb1)
+  , UseThumbBacktraces(false)
+  , IsR9Reserved(false)
+  , stackAlignment(4)
+  , CPUString("generic")
+  , TargetType(isELF) // Default to ELF unless otherwise specified.
+  , TargetABI(ARM_ABI_APCS) {
+  // Determine default and user specified characteristics
+
+  // Parse features string.
+  CPUString = ParseSubtargetFeatures(FS, CPUString);
+
+  // Set the boolean corresponding to the current target triple, or the default
+  // if one cannot be determined, to true.
+  const std::string& TT = M.getTargetTriple();
+  unsigned Len = TT.length();
+  unsigned Idx = 0;
+
+  if (Len >= 5 && TT.substr(0, 4) == "armv")
+    Idx = 4;
+  else if (Len >= 6 && TT.substr(0, 6) == "thumb") {
+    IsThumb = true;
+    if (Len >= 7 && TT[5] == 'v')
+      Idx = 6;
+  }
+  if (Idx) {
+    unsigned SubVer = TT[Idx];
+    if (SubVer > '4' && SubVer <= '9') {
+      if (SubVer >= '7')
+        ARMArchVersion = V7A;
+      else if (SubVer == '6')
+        ARMArchVersion = V6;
+      else if (SubVer == '5') {
+        ARMArchVersion = V5T;
+        if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+          ARMArchVersion = V5TE;
+      }
+    }
+  }
+
+  if (Len >= 10) {
+    if (TT.find("-darwin") != std::string::npos)
+      // arm-darwin
+      TargetType = isDarwin;
+  } else if (TT.empty()) {
+#if defined(__APPLE__)
+    TargetType = isDarwin;
+#endif
+  }
+
+  if (TT.find("eabi") != std::string::npos)
+    TargetABI = ARM_ABI_AAPCS;
+
+  if (isAAPCS_ABI())
+    stackAlignment = 8;
+
+  if (isTargetDarwin()) {
+    UseThumbBacktraces = true;
+    IsR9Reserved = true;
+  }
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 0000000..8b469cf
--- /dev/null
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,122 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+class Module;
+
+class ARMSubtarget : public TargetSubtarget {
+protected:
+  enum ARMArchEnum {
+    V4T, V5T, V5TE, V6, V7A
+  };
+
+  enum ARMFPEnum {
+    None, VFPv2, VFPv3, NEON
+  };
+
+  enum ThumbTypeEnum {
+    Thumb1,
+    Thumb2
+  };
+
+  /// ARMArchVersion - ARM architecture version: V4T (base), V5T, V5TE,
+  /// V6, V6T2, V7A.
+  ARMArchEnum ARMArchVersion;
+
+  /// ARMFPUType - Floating Point Unit type.
+  ARMFPEnum ARMFPUType;
+
+  /// IsThumb - True if we are in thumb mode, false if in ARM mode.
+  bool IsThumb;
+
+  /// ThumbMode - Indicates supported Thumb version.
+  ThumbTypeEnum ThumbMode;
+
+  /// UseThumbBacktraces - True if we use thumb style backtraces.
+  bool UseThumbBacktraces;
+
+  /// IsR9Reserved - True if R9 is a not available as general purpose register.
+  bool IsR9Reserved;
+
+  /// stackAlignment - The minimum alignment known to hold of the stack frame on
+  /// entry to the function and which must be maintained by every function.
+  unsigned stackAlignment;
+
+  /// CPUString - String name of used CPU.
+  std::string CPUString;
+
+ public:
+  enum {
+    isELF, isDarwin
+  } TargetType;
+
+  enum {
+    ARM_ABI_APCS,
+    ARM_ABI_AAPCS // ARM EABI
+  } TargetABI;
+
+  /// This constructor initializes the data members to match that
+  /// of the specified module.
+  ///
+  ARMSubtarget(const Module &M, const std::string &FS, bool isThumb);
+
+  /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+  /// that still makes it profitable to inline the call.
+  unsigned getMaxInlineSizeThreshold() const {
+    // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb.
+    // Change this once Thumb ldmia / stmia support is added.
+    return isThumb() ? 0 : 64;
+  }
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  bool hasV4TOps()  const { return ARMArchVersion >= V4T;  }
+  bool hasV5TOps()  const { return ARMArchVersion >= V5T;  }
+  bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
+  bool hasV6Ops()   const { return ARMArchVersion >= V6;   }
+  bool hasV7Ops()   const { return ARMArchVersion >= V7A;  }
+
+  bool hasVFP2() const { return ARMFPUType >= VFPv2; }
+  bool hasVFP3() const { return ARMFPUType >= VFPv3; }
+  bool hasNEON() const { return ARMFPUType >= NEON;  }
+
+  bool isTargetDarwin() const { return TargetType == isDarwin; }
+  bool isTargetELF() const { return TargetType == isELF; }
+
+  bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+  bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+  bool isThumb() const { return IsThumb; }
+  bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+
+  bool useThumbBacktraces() const { return UseThumbBacktraces; }
+  bool isR9Reserved() const { return IsR9Reserved; }
+
+  const std::string & getCPUString() const { return CPUString; }
+
+  /// getStackAlignment - Returns the minimum alignment known to hold of the
+  /// stack frame on entry to the function and which must be maintained by every
+  /// function for this subtarget.
+  unsigned getStackAlignment() const { return stackAlignment; }
+};
+} // End llvm namespace
+
+#endif  // ARMSUBTARGET_H
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
new file mode 100644
index 0000000..4107dcc
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -0,0 +1,291 @@
+//===-- ARMTargetAsmInfo.cpp - ARM asm properties ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetAsmInfo.h"
+#include "ARMTargetMachine.h"
+#include <cstring>
+#include <cctype>
+using namespace llvm;
+
+
+const char *const llvm::arm_asm_table[] = {
+                                      "{r0}", "r0",
+                                      "{r1}", "r1",
+                                      "{r2}", "r2",
+                                      "{r3}", "r3",
+                                      "{r4}", "r4",
+                                      "{r5}", "r5",
+                                      "{r6}", "r6",
+                                      "{r7}", "r7",
+                                      "{r8}", "r8",
+                                      "{r9}", "r9",
+                                      "{r10}", "r10",
+                                      "{r11}", "r11",
+                                      "{r12}", "r12",
+                                      "{r13}", "r13",
+                                      "{r14}", "r14",
+                                      "{lr}", "lr",
+                                      "{sp}", "sp",
+                                      "{ip}", "ip",
+                                      "{fp}", "fp",
+                                      "{sl}", "sl",
+                                      "{memory}", "memory",
+                                      "{cc}", "cc",
+                                      0,0};
+
+ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
+  ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
+  Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+  GlobalPrefix = "_";
+  PrivateGlobalPrefix = "L";
+  LessPrivateGlobalPrefix = "l";
+  StringConstantPrefix = "\1LC";
+  BSSSection = 0;                       // no BSS section
+  ZeroDirective = "\t.space\t";
+  ZeroFillDirective = "\t.zerofill\t";  // Uses .zerofill
+  SetDirective = "\t.set\t";
+  WeakRefDirective = "\t.weak_reference\t";
+  WeakDefDirective = "\t.weak_definition ";
+  HiddenDirective = "\t.private_extern\t";
+  ProtectedDirective = NULL;
+  JumpTableDataSection = ".const";
+  CStringSection = "\t.cstring";
+  HasDotTypeDotSizeDirective = false;
+  HasSingleParameterDotFile = false;
+  NeedsIndirectEncoding = true;
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorsSection = ".constructor";
+    StaticDtorsSection = ".destructor";
+  } else {
+    StaticCtorsSection = ".mod_init_func";
+    StaticDtorsSection = ".mod_term_func";
+  }
+
+  // In non-PIC modes, emit a special label before jump tables so that the
+  // linker can perform more accurate dead code stripping.
+  if (TM.getRelocationModel() != Reloc::PIC_) {
+    // Emit a local label that is preserved until the linker runs.
+    JumpTableSpecialLabelPrefix = "l";
+  }
+
+  NeedsSet = true;
+  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+}
+
+ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
+  ARMTargetAsmInfo<ELFTargetAsmInfo>(TM) {
+  Subtarget = &TM.getSubtarget<ARMSubtarget>();
+
+  NeedsSet = false;
+  HasLEB128 = true;
+  AbsoluteDebugSectionOffsets = true;
+  CStringSection = ".rodata.str";
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  SetDirective = "\t.set\t";
+  DwarfRequiresFrameSection = false;
+  DwarfAbbrevSection =  "\t.section\t.debug_abbrev,\"\",%progbits";
+  DwarfInfoSection =    "\t.section\t.debug_info,\"\",%progbits";
+  DwarfLineSection =    "\t.section\t.debug_line,\"\",%progbits";
+  DwarfFrameSection =   "\t.section\t.debug_frame,\"\",%progbits";
+  DwarfPubNamesSection ="\t.section\t.debug_pubnames,\"\",%progbits";
+  DwarfPubTypesSection ="\t.section\t.debug_pubtypes,\"\",%progbits";
+  DwarfStrSection =     "\t.section\t.debug_str,\"\",%progbits";
+  DwarfLocSection =     "\t.section\t.debug_loc,\"\",%progbits";
+  DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits";
+  DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",%progbits";
+  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
+
+  if (Subtarget->isAAPCS_ABI()) {
+    StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array";
+    StaticDtorsSection = "\t.section .fini_array,\"aw\",%fini_array";
+  } else {
+    StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
+    StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
+  }
+}
+
+/// Count the number of comma-separated arguments.
+/// Do not try to detect errors.
+template <class BaseTAI>
+unsigned ARMTargetAsmInfo<BaseTAI>::countArguments(const char* p) const {
+  unsigned count = 0;
+  while (*p && isspace(*p) && *p != '\n')
+    p++;
+  count++;
+  while (*p && *p!='\n' &&
+         strncmp(p, BaseTAI::CommentString,
+                 strlen(BaseTAI::CommentString))!=0) {
+    if (*p==',')
+      count++;
+    p++;
+  }
+  return count;
+}
+
+/// Count the length of a string enclosed in quote characters.
+/// Do not try to detect errors.
+template <class BaseTAI>
+unsigned ARMTargetAsmInfo<BaseTAI>::countString(const char* p) const {
+  unsigned count = 0;
+  while (*p && isspace(*p) && *p!='\n')
+    p++;
+  if (!*p || *p != '\"')
+    return count;
+  while (*++p && *p != '\"')
+    count++;
+  return count;
+}
+
+/// ARM-specific version of TargetAsmInfo::getInlineAsmLength.
+template <class BaseTAI>
+unsigned ARMTargetAsmInfo<BaseTAI>::getInlineAsmLength(const char *s) const {
+  // Make a lowercase-folded version of s for counting purposes.
+  char *q, *s_copy = (char *)malloc(strlen(s) + 1);
+  strcpy(s_copy, s);
+  for (q=s_copy; *q; q++)
+    *q = tolower(*q);
+  const char *Str = s_copy;
+
+  // Count the number of bytes in the asm.
+  bool atInsnStart = true;
+  bool inTextSection = true;
+  unsigned Length = 0;
+  for (; *Str; ++Str) {
+    if (atInsnStart) {
+      // Skip whitespace
+      while (*Str && isspace(*Str) && *Str != '\n')
+        Str++;
+      // Skip label
+      for (const char* p = Str; *p && !isspace(*p); p++)
+        if (*p == ':') {
+          Str = p+1;
+          while (*Str && isspace(*Str) && *Str != '\n')
+            Str++;
+          break;
+        }
+      
+      if (*Str == 0) break;
+      
+      // Ignore everything from comment char(s) to EOL
+      if (strncmp(Str, BaseTAI::CommentString,
+                  strlen(BaseTAI::CommentString)) == 0)
+        atInsnStart = false;
+      // FIXME do something like the following for non-Darwin
+      else if (*Str == '.' && Subtarget->isTargetDarwin()) {
+        // Directive.
+        atInsnStart = false;
+
+        // Some change the section, but don't generate code.
+        if (strncmp(Str, ".literal4", strlen(".literal4"))==0 ||
+            strncmp(Str, ".literal8", strlen(".literal8"))==0 ||
+            strncmp(Str, ".const", strlen(".const"))==0 ||
+            strncmp(Str, ".constructor", strlen(".constructor"))==0 ||
+            strncmp(Str, ".cstring", strlen(".cstring"))==0 ||
+            strncmp(Str, ".data", strlen(".data"))==0 ||
+            strncmp(Str, ".destructor", strlen(".destructor"))==0 ||
+            strncmp(Str, ".fvmlib_init0", strlen(".fvmlib_init0"))==0 ||
+            strncmp(Str, ".fvmlib_init1", strlen(".fvmlib_init1"))==0 ||
+            strncmp(Str, ".mod_init_func", strlen(".mod_init_func"))==0 ||
+            strncmp(Str, ".mod_term_func", strlen(".mod_term_func"))==0 ||
+            strncmp(Str, ".picsymbol_stub", strlen(".picsymbol_stub"))==0 ||
+            strncmp(Str, ".symbol_stub", strlen(".symbol_stub"))==0 ||
+            strncmp(Str, ".static_data", strlen(".static_data"))==0 ||
+            strncmp(Str, ".section", strlen(".section"))==0 ||
+            strncmp(Str, ".lazy_symbol_pointer", strlen(".lazy_symbol_pointer"))==0 ||
+            strncmp(Str, ".non_lazy_symbol_pointer", strlen(".non_lazy_symbol_pointer"))==0 ||
+            strncmp(Str, ".dyld", strlen(".dyld"))==0 ||
+            strncmp(Str, ".const_data", strlen(".const_data"))==0 ||
+            strncmp(Str, ".objc", strlen(".objc"))==0 ||       //// many directives
+            strncmp(Str, ".static_const", strlen(".static_const"))==0)
+          inTextSection=false;
+        else if (strncmp(Str, ".text", strlen(".text"))==0)
+          inTextSection = true;
+        // Some can't really be handled without implementing significant pieces
+        // of an assembler.  Others require dynamic adjustment of block sizes in
+        // AdjustBBOffsetsAfter; it's a big compile-time speed hit to check every
+        // instruction in there, and none of these are currently used in the kernel.
+        else if (strncmp(Str, ".macro", strlen(".macro"))==0 ||
+                 strncmp(Str, ".if", strlen(".if"))==0 ||
+                 strncmp(Str, ".align", strlen(".align"))==0 ||
+                 strncmp(Str, ".fill", strlen(".fill"))==0 ||
+                 strncmp(Str, ".space", strlen(".space"))==0 ||
+                 strncmp(Str, ".zerofill", strlen(".zerofill"))==0 ||
+                 strncmp(Str, ".p2align", strlen(".p2align"))==0 ||
+                 strncmp(Str, ".p2alignw", strlen(".p2alignw"))==0 ||
+                 strncmp(Str, ".p2alignl", strlen(".p2alignl"))==0 ||
+                 strncmp(Str, ".align32", strlen(".p2align32"))==0 ||
+                 strncmp(Str, ".include", strlen(".include"))==0)
+          cerr << "Directive " << Str << " in asm may lead to invalid offsets for" <<
+                   " constant pools (the assembler will tell you if this happens).\n";
+        // Some generate code, but this is only interesting in the text section.
+        else if (inTextSection) {
+          if (strncmp(Str, ".long", strlen(".long"))==0)
+            Length += 4*countArguments(Str+strlen(".long"));
+          else if (strncmp(Str, ".short", strlen(".short"))==0)
+            Length += 2*countArguments(Str+strlen(".short"));
+          else if (strncmp(Str, ".byte", strlen(".byte"))==0)
+            Length += 1*countArguments(Str+strlen(".byte"));
+          else if (strncmp(Str, ".single", strlen(".single"))==0)
+            Length += 4*countArguments(Str+strlen(".single"));
+          else if (strncmp(Str, ".double", strlen(".double"))==0)
+            Length += 8*countArguments(Str+strlen(".double"));
+          else if (strncmp(Str, ".quad", strlen(".quad"))==0)
+            Length += 16*countArguments(Str+strlen(".quad"));
+          else if (strncmp(Str, ".ascii", strlen(".ascii"))==0)
+            Length += countString(Str+strlen(".ascii"));
+          else if (strncmp(Str, ".asciz", strlen(".asciz"))==0)
+            Length += countString(Str+strlen(".asciz"))+1;
+        }
+      } else if (inTextSection) {
+        // An instruction
+        atInsnStart = false;
+        if (Subtarget->isThumb()) {
+          // BL and BLX <non-reg> are 4 bytes, all others 2.
+          if (strncmp(Str, "blx", strlen("blx"))==0) {
+            const char* p = Str+3;
+            while (*p && isspace(*p))
+              p++;
+            if (*p == 'r' || *p=='R')
+              Length += 2;    // BLX reg
+            else
+              Length += 4;    // BLX non-reg
+          } else if (strncmp(Str, "bl", strlen("bl"))==0)
+            Length += 4;    // BL
+          else
+            Length += 2;    // Thumb anything else
+        }
+        else
+          Length += 4;    // ARM
+      }
+    }
+    if (*Str == '\n' || *Str == BaseTAI::SeparatorChar)
+      atInsnStart = true;
+  }
+  free(s_copy);
+  return Length;
+}
+
+// Instantiate default implementation.
+TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>);
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
new file mode 100644
index 0000000..9e6f856
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -0,0 +1,64 @@
+//=====-- ARMTargetAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMTargetAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETASMINFO_H
+#define ARMTARGETASMINFO_H
+
+#include "ARMTargetMachine.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/ELFTargetAsmInfo.h"
+#include "llvm/Target/DarwinTargetAsmInfo.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+  extern const char *const arm_asm_table[];
+
+  template <class BaseTAI>
+  struct ARMTargetAsmInfo : public BaseTAI {
+    explicit ARMTargetAsmInfo(const ARMTargetMachine &TM):
+      BaseTAI(TM) {
+      BaseTAI::AsmTransCBE = arm_asm_table;
+
+      BaseTAI::AlignmentIsInBytes = false;
+      BaseTAI::Data64bitsDirective = 0;
+      BaseTAI::CommentString = "@";
+      BaseTAI::ConstantPoolSection = "\t.text\n";
+      BaseTAI::COMMDirectiveTakesAlignment = false;
+      BaseTAI::InlineAsmStart = "@ InlineAsm Start";
+      BaseTAI::InlineAsmEnd = "@ InlineAsm End";
+      BaseTAI::LCOMMDirective = "\t.lcomm\t";
+    }
+
+    const ARMSubtarget *Subtarget;
+
+    virtual unsigned getInlineAsmLength(const char *Str) const;
+    unsigned countArguments(const char *p) const;
+    unsigned countString(const char *p) const;
+  };
+
+  typedef ARMTargetAsmInfo<TargetAsmInfo> ARMGenericTargetAsmInfo;
+
+  EXTERN_TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>);
+
+  struct ARMDarwinTargetAsmInfo : public ARMTargetAsmInfo<DarwinTargetAsmInfo> {
+    explicit ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM);
+  };
+
+  struct ARMELFTargetAsmInfo : public ARMTargetAsmInfo<ELFTargetAsmInfo> {
+    explicit ARMELFTargetAsmInfo(const ARMTargetMachine &TM);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 0000000..1dc7d19
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,242 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARMTargetAsmInfo.h"
+#include "ARMFrameInfo.h"
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachineRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
+                              cl::desc("Disable load store optimization pass"));
+static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
+                              cl::desc("Disable if-conversion pass"));
+
+/// ARMTargetMachineModule - Note that this is used on hosts that cannot link
+/// in a library unless there are references into the library.  In particular,
+/// it seems that it is not possible to get things to work on Win32 without
+/// this.  Though it is unused, do not remove it.
+extern "C" int ARMTargetMachineModule;
+int ARMTargetMachineModule = 0;
+
+// Register the target.
+static RegisterTarget<ARMTargetMachine>   X("arm",   "ARM");
+static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
+
+// No assembler printer by default
+ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0;
+
+/// ThumbTargetMachine - Create an Thumb architecture model.
+///
+unsigned ThumbTargetMachine::getJITMatchQuality() {
+#if defined(__thumb__)
+  return 10;
+#endif
+  return 0;
+}
+
+unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) {
+  std::string TT = M.getTargetTriple();
+  // Match thumb-foo-bar, as well as things like thumbv5blah-*
+  if (TT.size() >= 6 &&
+      (TT.substr(0, 6) == "thumb-" || TT.substr(0, 6) == "thumbv"))
+    return 20;
+
+  // If the target triple is something non-thumb, we don't match.
+  if (!TT.empty()) return 0;
+
+  if (M.getEndianness()  == Module::LittleEndian &&
+      M.getPointerSize() == Module::Pointer32)
+    return 10;                                   // Weak match
+  else if (M.getEndianness() != Module::AnyEndianness ||
+           M.getPointerSize() != Module::AnyPointerSize)
+    return 0;                                    // Match for some other target
+
+  return getJITMatchQuality()/2;
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
+  : ARMTargetMachine(M, FS, true) {
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
+                                   bool isThumb)
+  : Subtarget(M, FS, isThumb),
+    DataLayout(Subtarget.isAPCS_ABI() ?
+               // APCS ABI
+          (isThumb ?
+           std::string("e-p:32:32-f64:32:32-i64:32:32-"
+                       "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+           std::string("e-p:32:32-f64:32:32-i64:32:32")) :
+               // AAPCS ABI
+          (isThumb ?
+           std::string("e-p:32:32-f64:64:64-i64:64:64-"
+                       "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+           std::string("e-p:32:32-f64:64:64-i64:64:64"))),
+    InstrInfo(Subtarget),
+    FrameInfo(Subtarget),
+    JITInfo(),
+    TLInfo(*this) {
+  DefRelocModel = getRelocationModel();
+}
+
+unsigned ARMTargetMachine::getJITMatchQuality() {
+#if defined(__arm__)
+  return 10;
+#endif
+  return 0;
+}
+
+unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
+  std::string TT = M.getTargetTriple();
+  // Match arm-foo-bar, as well as things like armv5blah-*
+  if (TT.size() >= 4 &&
+      (TT.substr(0, 4) == "arm-" || TT.substr(0, 4) == "armv"))
+    return 20;
+  // If the target triple is something non-arm, we don't match.
+  if (!TT.empty()) return 0;
+
+  if (M.getEndianness()  == Module::LittleEndian &&
+      M.getPointerSize() == Module::Pointer32)
+    return 10;                                   // Weak match
+  else if (M.getEndianness() != Module::AnyEndianness ||
+           M.getPointerSize() != Module::AnyPointerSize)
+    return 0;                                    // Match for some other target
+
+  return getJITMatchQuality()/2;
+}
+
+
+const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
+  switch (Subtarget.TargetType) {
+   case ARMSubtarget::isDarwin:
+    return new ARMDarwinTargetAsmInfo(*this);
+   case ARMSubtarget::isELF:
+    return new ARMELFTargetAsmInfo(*this);
+   default:
+    return new ARMGenericTargetAsmInfo(*this);
+  }
+}
+
+
+// Pass Pipeline Configuration
+bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  PM.add(createARMISelDag(*this));
+  return false;
+}
+
+bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+  if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+    PM.add(createARMLoadStoreOptimizationPass());
+
+  if (OptLevel != CodeGenOpt::None &&
+      !DisableIfConversion && !Subtarget.isThumb())
+    PM.add(createIfConverterPass());
+
+  PM.add(createARMConstantIslandPass());
+  return true;
+}
+
+bool ARMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel,
+                                          bool Verbose,
+                                          raw_ostream &Out) {
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+
+  return false;
+}
+
+
+bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel,
+                                      bool DumpAsm,
+                                      MachineCodeEmitter &MCE) {
+  // FIXME: Move this to TargetJITInfo!
+  if (DefRelocModel == Reloc::Default)
+    setRelocationModel(Reloc::Static);
+
+  // Machine code emitter pass for ARM.
+  PM.add(createARMCodeEmitterPass(*this, MCE));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
+
+  return false;
+}
+
+bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel,
+                                      bool DumpAsm,
+                                      JITCodeEmitter &JCE) {
+  // FIXME: Move this to TargetJITInfo!
+  if (DefRelocModel == Reloc::Default)
+    setRelocationModel(Reloc::Static);
+
+  // Machine code emitter pass for ARM.
+  PM.add(createARMJITCodeEmitterPass(*this, JCE));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
+
+  return false;
+}
+
+bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool DumpAsm,
+                                            MachineCodeEmitter &MCE) {
+  // Machine code emitter pass for ARM.
+  PM.add(createARMCodeEmitterPass(*this, MCE));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
+
+  return false;
+}
+
+bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool DumpAsm,
+                                            JITCodeEmitter &JCE) {
+  // Machine code emitter pass for ARM.
+  PM.add(createARMJITCodeEmitterPass(*this, JCE));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
+
+  return false;
+}
+
+
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 0000000..916a8aa
--- /dev/null
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,104 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMFrameInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+
+namespace llvm {
+
+class Module;
+
+class ARMTargetMachine : public LLVMTargetMachine {
+  ARMSubtarget      Subtarget;
+  const TargetData  DataLayout;       // Calculates type size & alignment
+  ARMInstrInfo      InstrInfo;
+  ARMFrameInfo      FrameInfo;
+  ARMJITInfo        JITInfo;
+  ARMTargetLowering TLInfo;
+  Reloc::Model      DefRelocModel;    // Reloc model before it's overridden.
+
+protected:
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            ARMTargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
+public:
+  ARMTargetMachine(const Module &M, const std::string &FS, bool isThumb = false);
+
+  virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
+  virtual const ARMFrameInfo     *getFrameInfo() const { return &FrameInfo; }
+  virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
+  virtual const ARMRegisterInfo  *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
+  virtual       ARMTargetLowering *getTargetLowering() const {
+    return const_cast<ARMTargetLowering*>(&TLInfo);
+  }
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
+
+  static unsigned getModuleMatchQuality(const Module &M);
+  static unsigned getJITMatchQuality();
+
+  virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addAssemblyEmitter(PassManagerBase &PM,
+                                  CodeGenOpt::Level OptLevel,
+                                  bool Verbose, raw_ostream &Out);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              bool DumpAsm, MachineCodeEmitter &MCE);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              bool DumpAsm, JITCodeEmitter &MCE);
+  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+                                    CodeGenOpt::Level OptLevel,
+                                    bool DumpAsm,
+                                    MachineCodeEmitter &MCE);
+  virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
+                                    CodeGenOpt::Level OptLevel,
+                                    bool DumpAsm,
+                                    JITCodeEmitter &MCE);
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+///
+class ThumbTargetMachine : public ARMTargetMachine {
+public:
+  ThumbTargetMachine(const Module &M, const std::string &FS);
+
+  static unsigned getJITMatchQuality();
+  static unsigned getModuleMatchQuality(const Module &M);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
new file mode 100644
index 0000000..d908cf4
--- /dev/null
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -0,0 +1,1117 @@
+//===-- ARMAsmPrinter.cpp - ARM LLVM assembly writer ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM.h"
+#include "ARMBuildAttrs.h"
+#include "ARMTargetMachine.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Mangler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+namespace {
+  class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
+    DwarfWriter *DW;
+    MachineModuleInfo *MMI;
+
+    /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+    /// make the right decision when printing asm code for different targets.
+    const ARMSubtarget *Subtarget;
+
+    /// AFI - Keep a pointer to ARMFunctionInfo for the current
+    /// MachineFunction.
+    ARMFunctionInfo *AFI;
+
+    /// MCP - Keep a pointer to constantpool entries of the current
+    /// MachineFunction.
+    const MachineConstantPool *MCP;
+
+    /// We name each basic block in a Function with a unique number, so
+    /// that we can consistently refer to them later. This is cleared
+    /// at the beginning of each call to runOnMachineFunction().
+    ///
+    typedef std::map<const Value *, unsigned> ValueMapTy;
+    ValueMapTy NumberForBB;
+
+    /// GVNonLazyPtrs - Keeps the set of GlobalValues that require
+    /// non-lazy-pointers for indirect access.
+    StringSet<> GVNonLazyPtrs;
+
+    /// HiddenGVNonLazyPtrs - Keeps the set of GlobalValues with hidden
+    /// visibility that require non-lazy-pointers for indirect access.
+    StringSet<> HiddenGVNonLazyPtrs;
+
+    /// FnStubs - Keeps the set of external function GlobalAddresses that the
+    /// asm printer should generate stubs for.
+    StringSet<> FnStubs;
+
+    /// True if asm printer is printing a series of CONSTPOOL_ENTRY.
+    bool InCPMode;
+  public:
+    explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
+                           const TargetAsmInfo *T, CodeGenOpt::Level OL,
+                           bool V)
+      : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL),
+        InCPMode(false) {
+      Subtarget = &TM.getSubtarget<ARMSubtarget>();
+    }
+
+    virtual const char *getPassName() const {
+      return "ARM Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int opNum,
+                      const char *Modifier = 0);
+    void printSOImmOperand(const MachineInstr *MI, int opNum);
+    void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
+    void printSORegOperand(const MachineInstr *MI, int opNum);
+    void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
+    void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
+    void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
+    void printAddrMode3OffsetOperand(const MachineInstr *MI, int OpNo);
+    void printAddrMode4Operand(const MachineInstr *MI, int OpNo,
+                               const char *Modifier = 0);
+    void printAddrMode5Operand(const MachineInstr *MI, int OpNo,
+                               const char *Modifier = 0);
+    void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
+                                const char *Modifier = 0);
+    void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
+                                      unsigned Scale);
+    void printThumbAddrModeS1Operand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo);
+    void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
+    void printPredicateOperand(const MachineInstr *MI, int opNum);
+    void printSBitModifierOperand(const MachineInstr *MI, int opNum);
+    void printPCLabel(const MachineInstr *MI, int opNum);
+    void printRegisterList(const MachineInstr *MI, int opNum);
+    void printCPInstOperand(const MachineInstr *MI, int opNum,
+                            const char *Modifier);
+    void printJTBlockOperand(const MachineInstr *MI, int opNum);
+
+    virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                 unsigned AsmVariant, const char *ExtraCode);
+    virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode);
+
+    void printModuleLevelGV(const GlobalVariable* GVar);
+    bool printInstruction(const MachineInstr *MI);  // autogenerated.
+    void printMachineInstruction(const MachineInstr *MI);
+    bool runOnMachineFunction(MachineFunction &F);
+    bool doInitialization(Module &M);
+    bool doFinalization(Module &M);
+
+    /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+    /// the .s file.
+    virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+      printDataDirective(MCPV->getType());
+
+      ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+      GlobalValue *GV = ACPV->getGV();
+      std::string Name = GV ? Mang->getValueName(GV) : TAI->getGlobalPrefix();
+      if (!GV)
+        Name += ACPV->getSymbol();
+      if (ACPV->isNonLazyPointer()) {
+        if (GV->hasHiddenVisibility())
+          HiddenGVNonLazyPtrs.insert(Name);
+        else
+          GVNonLazyPtrs.insert(Name);
+        printSuffixedName(Name, "$non_lazy_ptr");
+      } else if (ACPV->isStub()) {
+        FnStubs.insert(Name);
+        printSuffixedName(Name, "$stub");
+      } else
+        O << Name;
+      if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
+      if (ACPV->getPCAdjustment() != 0) {
+        O << "-(" << TAI->getPrivateGlobalPrefix() << "PC"
+          << utostr(ACPV->getLabelId())
+          << "+" << (unsigned)ACPV->getPCAdjustment();
+         if (ACPV->mustAddCurrentAddress())
+           O << "-.";
+         O << ")";
+      }
+      O << "\n";
+
+      // If the constant pool value is a extern weak symbol, remember to emit
+      // the weak reference.
+      if (GV && GV->hasExternalWeakLinkage())
+        ExtWeakSymbols.insert(GV);
+    }
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AsmPrinter::getAnalysisUsage(AU);
+      AU.setPreservesAll();
+      AU.addRequired<MachineModuleInfo>();
+      AU.addRequired<DwarfWriter>();
+    }
+  };
+} // end of anonymous namespace
+
+#include "ARMGenAsmWriter.inc"
+
+/// runOnMachineFunction - This uses the printInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  this->MF = &MF;
+
+  AFI = MF.getInfo<ARMFunctionInfo>();
+  MCP = MF.getConstantPool();
+
+  SetupMachineFunction(MF);
+  O << "\n";
+
+  // NOTE: we don't print out constant pools here, they are handled as
+  // instructions.
+
+  O << "\n";
+  // Print out labels for the function.
+  const Function *F = MF.getFunction();
+  switch (F->getLinkage()) {
+  default: assert(0 && "Unknown linkage type!");
+  case Function::PrivateLinkage:
+  case Function::InternalLinkage:
+    SwitchToTextSection("\t.text", F);
+    break;
+  case Function::ExternalLinkage:
+    SwitchToTextSection("\t.text", F);
+    O << "\t.globl\t" << CurrentFnName << "\n";
+    break;
+  case Function::WeakAnyLinkage:
+  case Function::WeakODRLinkage:
+  case Function::LinkOnceAnyLinkage:
+  case Function::LinkOnceODRLinkage:
+    if (Subtarget->isTargetDarwin()) {
+      SwitchToTextSection(
+                ".section __TEXT,__textcoal_nt,coalesced,pure_instructions", F);
+      O << "\t.globl\t" << CurrentFnName << "\n";
+      O << "\t.weak_definition\t" << CurrentFnName << "\n";
+    } else {
+      O << TAI->getWeakRefDirective() << CurrentFnName << "\n";
+    }
+    break;
+  }
+
+  printVisibility(CurrentFnName, F->getVisibility());
+
+  if (AFI->isThumbFunction()) {
+    EmitAlignment(1, F, AFI->getAlign());
+    O << "\t.code\t16\n";
+    O << "\t.thumb_func";
+    if (Subtarget->isTargetDarwin())
+      O << "\t" << CurrentFnName;
+    O << "\n";
+    InCPMode = false;
+  } else
+    EmitAlignment(2, F);
+
+  O << CurrentFnName << ":\n";
+  // Emit pre-function debug information.
+  DW->BeginFunction(&MF);
+
+  if (Subtarget->isTargetDarwin()) {
+    // If the function is empty, then we need to emit *something*. Otherwise,
+    // the function's label might be associated with something that it wasn't
+    // meant to be associated with. We emit a noop in this situation.
+    MachineFunction::iterator I = MF.begin();
+
+    if (++I == MF.end() && MF.front().empty())
+      O << "\tnop\n";
+  }
+
+  // Print out code for the function.
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    // Print a label for the basic block.
+    if (I != MF.begin()) {
+      printBasicBlockLabel(I, true, true, VerboseAsm);
+      O << '\n';
+    }
+    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+         II != E; ++II) {
+      // Print the assembly for the instruction.
+      printMachineInstruction(II);
+    }
+  }
+
+  if (TAI->hasDotTypeDotSizeDirective())
+    O << "\t.size " << CurrentFnName << ", .-" << CurrentFnName << "\n";
+
+  // Emit post-function debug information.
+  DW->EndFunction(&MF);
+
+  O.flush();
+
+  return false;
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                 const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+      O << TM.getRegisterInfo()->get(MO.getReg()).AsmName;
+    else
+      assert(0 && "not implemented");
+    break;
+  case MachineOperand::MO_Immediate: {
+    if (!Modifier || strcmp(Modifier, "no_hash") != 0)
+      O << "#";
+
+    O << MO.getImm();
+    break;
+  }
+  case MachineOperand::MO_MachineBasicBlock:
+    printBasicBlockLabel(MO.getMBB());
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    bool isCallOp = Modifier && !strcmp(Modifier, "call");
+    GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getValueName(GV);
+    bool isExt = (GV->isDeclaration() || GV->hasWeakLinkage() ||
+                  GV->hasLinkOnceLinkage());
+    if (isExt && isCallOp && Subtarget->isTargetDarwin() &&
+        TM.getRelocationModel() != Reloc::Static) {
+      printSuffixedName(Name, "$stub");
+      FnStubs.insert(Name);
+    } else
+      O << Name;
+
+    printOffset(MO.getOffset());
+
+    if (isCallOp && Subtarget->isTargetELF() &&
+        TM.getRelocationModel() == Reloc::PIC_)
+      O << "(PLT)";
+    if (GV->hasExternalWeakLinkage())
+      ExtWeakSymbols.insert(GV);
+    break;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    bool isCallOp = Modifier && !strcmp(Modifier, "call");
+    std::string Name(TAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    if (isCallOp && Subtarget->isTargetDarwin() &&
+        TM.getRelocationModel() != Reloc::Static) {
+      printSuffixedName(Name, "$stub");
+      FnStubs.insert(Name);
+    } else
+      O << Name;
+    if (isCallOp && Subtarget->isTargetELF() &&
+        TM.getRelocationModel() == Reloc::PIC_)
+      O << "(PLT)";
+    break;
+  }
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  default:
+    O << "<unknown operand type>"; abort (); break;
+  }
+}
+
+static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm,
+                       const TargetAsmInfo *TAI) {
+  assert(V < (1 << 12) && "Not a valid so_imm value!");
+  unsigned Imm = ARM_AM::getSOImmValImm(V);
+  unsigned Rot = ARM_AM::getSOImmValRot(V);
+
+  // Print low-level immediate formation info, per
+  // A5.1.3: "Data-processing operands - Immediate".
+  if (Rot) {
+    O << "#" << Imm << ", " << Rot;
+    // Pretty printed version.
+    if (VerboseAsm)
+      O << ' ' << TAI->getCommentString()
+        << ' ' << (int)ARM_AM::rotr32(Imm, Rot);
+  } else {
+    O << "#" << Imm;
+  }
+}
+
+/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
+/// immediate in bits 0-7.
+void ARMAsmPrinter::printSOImmOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isImm() && "Not a valid so_imm value!");
+  printSOImm(O, MO.getImm(), VerboseAsm, TAI);
+}
+
+/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov'
+/// followed by an 'orr' to materialize.
+void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isImm() && "Not a valid so_imm value!");
+  unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO.getImm());
+  unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO.getImm());
+  printSOImm(O, ARM_AM::getSOImmVal(V1), VerboseAsm, TAI);
+  O << "\n\torr";
+  printPredicateOperand(MI, 2);
+  O << " ";
+  printOperand(MI, 0); 
+  O << ", ";
+  printOperand(MI, 0); 
+  O << ", ";
+  printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms.  This includes:
+//    REG 0   0    - e.g. R5
+//    REG REG 0,SH_OPC     - e.g. R5, ROR R3
+//    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
+void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+  assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+  O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+  // Print the shift opc.
+  O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
+    << " ";
+
+  if (MO2.getReg()) {
+    assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+    O << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+    assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+  } else {
+    O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+  }
+}
+
+void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op);
+    return;
+  }
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+  if (!MO2.getReg()) {
+    if (ARM_AM::getAM2Offset(MO3.getImm()))  // Don't print +0.
+      O << ", #"
+        << (char)ARM_AM::getAM2Op(MO3.getImm())
+        << ARM_AM::getAM2Offset(MO3.getImm());
+    O << "]";
+    return;
+  }
+
+  O << ", "
+    << (char)ARM_AM::getAM2Op(MO3.getImm())
+    << TM.getRegisterInfo()->get(MO2.getReg()).AsmName;
+  
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+    O << ", "
+      << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+      << " #" << ShImm;
+  O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+  if (!MO1.getReg()) {
+    unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+    assert(ImmOffs && "Malformed indexed load / store!");
+    O << "#"
+      << (char)ARM_AM::getAM2Op(MO2.getImm())
+      << ImmOffs;
+    return;
+  }
+
+  O << (char)ARM_AM::getAM2Op(MO2.getImm())
+    << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+    O << ", "
+      << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
+      << " #" << ShImm;
+}
+
+void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  const MachineOperand &MO3 = MI->getOperand(Op+2);
+  
+  assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+
+  if (MO2.getReg()) {
+    O << ", "
+      << (char)ARM_AM::getAM3Op(MO3.getImm())
+      << TM.getRegisterInfo()->get(MO2.getReg()).AsmName
+      << "]";
+    return;
+  }
+  
+  if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+    O << ", #"
+      << (char)ARM_AM::getAM3Op(MO3.getImm())
+      << ImmOffs;
+  O << "]";
+}
+
+void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+  if (MO1.getReg()) {
+    O << (char)ARM_AM::getAM3Op(MO2.getImm())
+      << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+    return;
+  }
+
+  unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+  assert(ImmOffs && "Malformed indexed load / store!");
+  O << "#"
+    << (char)ARM_AM::getAM3Op(MO2.getImm())
+    << ImmOffs;
+}
+  
+void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op,
+                                          const char *Modifier) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm());
+  if (Modifier && strcmp(Modifier, "submode") == 0) {
+    if (MO1.getReg() == ARM::SP) {
+      bool isLDM = (MI->getOpcode() == ARM::LDM ||
+                    MI->getOpcode() == ARM::LDM_RET);
+      O << ARM_AM::getAMSubModeAltStr(Mode, isLDM);
+    } else
+      O << ARM_AM::getAMSubModeStr(Mode);
+  } else {
+    printOperand(MI, Op);
+    if (ARM_AM::getAM4WBFlag(MO2.getImm()))
+      O << "!";
+  }
+}
+
+void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
+                                          const char *Modifier) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op);
+    return;
+  }
+  
+  assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+
+  if (Modifier && strcmp(Modifier, "submode") == 0) {
+    ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
+    if (MO1.getReg() == ARM::SP) {
+      bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
+                     MI->getOpcode() == ARM::FLDMS);
+      O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
+    } else
+      O << ARM_AM::getAMSubModeStr(Mode);
+    return;
+  } else if (Modifier && strcmp(Modifier, "base") == 0) {
+    // Used for FSTM{D|S} and LSTM{D|S} operations.
+    O << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+    if (ARM_AM::getAM5WBFlag(MO2.getImm()))
+      O << "!";
+    return;
+  }
+  
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  
+  if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+    O << ", #"
+      << (char)ARM_AM::getAM5Op(MO2.getImm())
+      << ImmOffs*4;
+  }
+  O << "]";
+}
+
+void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op,
+                                           const char *Modifier) {
+  if (Modifier && strcmp(Modifier, "label") == 0) {
+    printPCLabel(MI, Op+1);
+    return;
+  }
+
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+  O << "[pc, +" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRROperand(const MachineInstr *MI, int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  O << ", " << TM.getRegisterInfo()->get(MO2.getReg()).AsmName << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
+                                            unsigned Scale) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  const MachineOperand &MO3 = MI->getOperand(Op+2);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op);
+    return;
+  }
+
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  if (MO3.getReg())
+    O << ", " << TM.getRegisterInfo()->get(MO3.getReg()).AsmName;
+  else if (unsigned ImmOffs = MO2.getImm()) {
+    O << ", #" << ImmOffs;
+    if (Scale > 1)
+      O << " * " << Scale;
+  }
+  O << "]";
+}
+
+void
+ARMAsmPrinter::printThumbAddrModeS1Operand(const MachineInstr *MI, int Op) {
+  printThumbAddrModeRI5Operand(MI, Op, 1);
+}
+void
+ARMAsmPrinter::printThumbAddrModeS2Operand(const MachineInstr *MI, int Op) {
+  printThumbAddrModeRI5Operand(MI, Op, 2);
+}
+void
+ARMAsmPrinter::printThumbAddrModeS4Operand(const MachineInstr *MI, int Op) {
+  printThumbAddrModeRI5Operand(MI, Op, 4);
+}
+
+void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
+  const MachineOperand &MO1 = MI->getOperand(Op);
+  const MachineOperand &MO2 = MI->getOperand(Op+1);
+  O << "[" << TM.getRegisterInfo()->get(MO1.getReg()).AsmName;
+  if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs << " * 4";
+  O << "]";
+}
+
+void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm();
+  if (CC != ARMCC::AL)
+    O << ARMCondCodeToString(CC);
+}
+
+void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int opNum){
+  unsigned Reg = MI->getOperand(opNum).getReg();
+  if (Reg) {
+    assert(Reg == ARM::CPSR && "Expect ARM CPSR register!");
+    O << 's';
+  }
+}
+
+void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int opNum) {
+  int Id = (int)MI->getOperand(opNum).getImm();
+  O << TAI->getPrivateGlobalPrefix() << "PC" << Id;
+}
+
+void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int opNum) {
+  O << "{";
+  for (unsigned i = opNum, e = MI->getNumOperands(); i != e; ++i) {
+    printOperand(MI, i);
+    if (i != e-1) O << ", ";
+  }
+  O << "}";
+}
+
+void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
+                                       const char *Modifier) {
+  assert(Modifier && "This operand only works with a modifier!");
+  // There are two aspects to a CONSTANTPOOL_ENTRY operand, the label and the
+  // data itself.
+  if (!strcmp(Modifier, "label")) {
+    unsigned ID = MI->getOperand(OpNo).getImm();
+    O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << ID << ":\n";
+  } else {
+    assert(!strcmp(Modifier, "cpentry") && "Unknown modifier for CPE");
+    unsigned CPI = MI->getOperand(OpNo).getIndex();
+
+    const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+    
+    if (MCPE.isMachineConstantPoolEntry()) {
+      EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+    } else {
+      EmitGlobalConstant(MCPE.Val.ConstVal);
+      // remember to emit the weak reference
+      if (const GlobalValue *GV = dyn_cast<GlobalValue>(MCPE.Val.ConstVal))
+        if (GV->hasExternalWeakLinkage())
+          ExtWeakSymbols.insert(GV);
+    }
+  }
+}
+
+void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNo) {
+  const MachineOperand &MO1 = MI->getOperand(OpNo);
+  const MachineOperand &MO2 = MI->getOperand(OpNo+1); // Unique Id
+  unsigned JTI = MO1.getIndex();
+  O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+    << '_' << JTI << '_' << MO2.getImm() << ":\n";
+
+  const char *JTEntryDirective = TAI->getJumpTableDirective();
+  if (!JTEntryDirective)
+    JTEntryDirective = TAI->getData32bitsDirective();
+
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  bool UseSet= TAI->getSetDirective() && TM.getRelocationModel() == Reloc::PIC_;
+  std::set<MachineBasicBlock*> JTSets;
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    if (UseSet && JTSets.insert(MBB).second)
+      printPICJumpTableSetLabel(JTI, MO2.getImm(), MBB);
+
+    O << JTEntryDirective << ' ';
+    if (UseSet)
+      O << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
+        << '_' << JTI << '_' << MO2.getImm()
+        << "_set_" << MBB->getNumber();
+    else if (TM.getRelocationModel() == Reloc::PIC_) {
+      printBasicBlockLabel(MBB, false, false, false);
+      // If the arch uses custom Jump Table directives, don't calc relative to JT
+      if (!TAI->getJumpTableDirective()) 
+        O << '-' << TAI->getPrivateGlobalPrefix() << "JTI"
+          << getFunctionNumber() << '_' << JTI << '_' << MO2.getImm();
+    } else
+      printBasicBlockLabel(MBB, false, false, false);
+    if (i != e-1)
+      O << '\n';
+  }
+}
+
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant, const char *ExtraCode){
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+    
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'a': // Don't print "#" before a global var name or constant.
+    case 'c': // Don't print "$" before a global var name or constant.
+      printOperand(MI, OpNo, "no_hash");
+      return false;
+    case 'P': // Print a VFP double precision register.
+      printOperand(MI, OpNo);
+      return false;
+    case 'Q':
+      if (TM.getTargetData()->isLittleEndian())
+        break;
+      // Fallthrough
+    case 'R':
+      if (TM.getTargetData()->isBigEndian())
+        break;
+      // Fallthrough
+    case 'H': // Write second word of DI / DF reference.  
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+    }
+  }
+  
+  printOperand(MI, OpNo);
+  return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNo, unsigned AsmVariant,
+                                          const char *ExtraCode) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  printAddrMode2Operand(MI, OpNo);
+  return false;
+}
+
+void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
+  ++EmittedInsts;
+
+  int Opc = MI->getOpcode();
+  switch (Opc) {
+  case ARM::CONSTPOOL_ENTRY:
+    if (!InCPMode && AFI->isThumbFunction()) {
+      EmitAlignment(2);
+      InCPMode = true;
+    }
+    break;
+  default: {
+    if (InCPMode && AFI->isThumbFunction())
+      InCPMode = false;
+  }}
+
+  // Call the autogenerated instruction printer routines.
+  printInstruction(MI);
+}
+
+bool ARMAsmPrinter::doInitialization(Module &M) {
+
+  bool Result = AsmPrinter::doInitialization(M);
+
+  // Emit initial debug information.
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  assert(MMI);
+  DW = getAnalysisIfAvailable<DwarfWriter>();
+  assert(DW && "Dwarf Writer is not available");
+  DW->BeginModule(&M, MMI, O, this, TAI);
+
+  // Darwin wants symbols to be quoted if they have complex names.
+  if (Subtarget->isTargetDarwin())
+    Mang->setUseQuotes(true);
+
+  // Emit ARM Build Attributes
+  if (Subtarget->isTargetELF()) {
+    // CPU Type
+    std::string CPUString = Subtarget->getCPUString();
+    if (CPUString != "generic")
+      O << "\t.cpu " << CPUString << '\n';
+
+    // FIXME: Emit FPU type
+    if (Subtarget->hasVFP2())
+      O << "\t.eabi_attribute " << ARMBuildAttrs::VFP_arch << ", 2\n";
+
+    // Signal various FP modes.
+    if (!UnsafeFPMath)
+      O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_denormal << ", 1\n"
+        << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_exceptions << ", 1\n";
+
+    if (FiniteOnlyFPMath())
+      O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 1\n";
+    else
+      O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_FP_number_model << ", 3\n";
+
+    // 8-bytes alignment stuff.
+    O << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_needed << ", 1\n"
+      << "\t.eabi_attribute " << ARMBuildAttrs::ABI_align8_preserved << ", 1\n";
+
+    // FIXME: Should we signal R9 usage?
+  }
+
+  return Result;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+  for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+       Name != E; ++Name)
+    if (isprint(*Name))
+      OS << *Name;
+}
+
+void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
+  const TargetData *TD = TM.getTargetData();
+
+  if (!GVar->hasInitializer())   // External global require no code
+    return;
+
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+
+  if (EmitSpecialLLVMGlobal(GVar)) {
+    if (Subtarget->isTargetDarwin() &&
+        TM.getRelocationModel() == Reloc::Static) {
+      if (GVar->getName() == "llvm.global_ctors")
+        O << ".reference .constructors_used\n";
+      else if (GVar->getName() == "llvm.global_dtors")
+        O << ".reference .destructors_used\n";
+    }
+    return;
+  }
+
+  std::string name = Mang->getValueName(GVar);
+  Constant *C = GVar->getInitializer();
+  const Type *Type = C->getType();
+  unsigned Size = TD->getTypeAllocSize(Type);
+  unsigned Align = TD->getPreferredAlignmentLog(GVar);
+  bool isDarwin = Subtarget->isTargetDarwin();
+
+  printVisibility(name, GVar->getVisibility());
+
+  if (Subtarget->isTargetELF())
+    O << "\t.type " << name << ",%object\n";
+
+  if (C->isNullValue() && !GVar->hasSection() && !GVar->isThreadLocal() &&
+      !(isDarwin &&
+        TAI->SectionKindForGlobal(GVar) == SectionKind::RODataMergeStr)) {
+    // FIXME: This seems to be pretty darwin-specific
+
+    if (GVar->hasExternalLinkage()) {
+      SwitchToSection(TAI->SectionForGlobal(GVar));
+      if (const char *Directive = TAI->getZeroFillDirective()) {
+        O << "\t.globl\t" << name << "\n";
+        O << Directive << "__DATA, __common, " << name << ", "
+          << Size << ", " << Align << "\n";
+        return;
+      }
+    }
+
+    if (GVar->hasLocalLinkage() || GVar->isWeakForLinker()) {
+      if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
+
+      if (isDarwin) {
+        if (GVar->hasLocalLinkage()) {
+          O << TAI->getLCOMMDirective()  << name << "," << Size
+            << ',' << Align;
+        } else if (GVar->hasCommonLinkage()) {
+          O << TAI->getCOMMDirective()  << name << "," << Size
+            << ',' << Align;
+        } else {
+          SwitchToSection(TAI->SectionForGlobal(GVar));
+          O << "\t.globl " << name << '\n'
+            << TAI->getWeakDefDirective() << name << '\n';
+          EmitAlignment(Align, GVar);
+          O << name << ":";
+          if (VerboseAsm) {
+            O << "\t\t\t\t" << TAI->getCommentString() << ' ';
+            PrintUnmangledNameSafely(GVar, O);
+          }
+          O << '\n';
+          EmitGlobalConstant(C);
+          return;
+        }
+      } else if (TAI->getLCOMMDirective() != NULL) {
+        if (GVar->hasLocalLinkage()) {
+          O << TAI->getLCOMMDirective() << name << "," << Size;
+        } else {
+          O << TAI->getCOMMDirective()  << name << "," << Size;
+          if (TAI->getCOMMDirectiveTakesAlignment())
+            O << ',' << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+        }
+      } else {
+        SwitchToSection(TAI->SectionForGlobal(GVar));
+        if (GVar->hasLocalLinkage())
+          O << "\t.local\t" << name << "\n";
+        O << TAI->getCOMMDirective()  << name << "," << Size;
+        if (TAI->getCOMMDirectiveTakesAlignment())
+          O << "," << (TAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+      }
+      if (VerboseAsm) {
+        O << "\t\t" << TAI->getCommentString() << " ";
+        PrintUnmangledNameSafely(GVar, O);
+      }
+      O << "\n";
+      return;
+    }
+  }
+
+  SwitchToSection(TAI->SectionForGlobal(GVar));
+  switch (GVar->getLinkage()) {
+   case GlobalValue::CommonLinkage:
+   case GlobalValue::LinkOnceAnyLinkage:
+   case GlobalValue::LinkOnceODRLinkage:
+   case GlobalValue::WeakAnyLinkage:
+   case GlobalValue::WeakODRLinkage:
+    if (isDarwin) {
+      O << "\t.globl " << name << "\n"
+        << "\t.weak_definition " << name << "\n";
+    } else {
+      O << "\t.weak " << name << "\n";
+    }
+    break;
+   case GlobalValue::AppendingLinkage:
+    // FIXME: appending linkage variables should go into a section of
+    // their name or something.  For now, just emit them as external.
+   case GlobalValue::ExternalLinkage:
+    O << "\t.globl " << name << "\n";
+    // FALL THROUGH
+   case GlobalValue::PrivateLinkage:
+   case GlobalValue::InternalLinkage:
+    break;
+   default:
+    assert(0 && "Unknown linkage type!");
+    break;
+  }
+
+  EmitAlignment(Align, GVar);
+  O << name << ":";
+  if (VerboseAsm) {
+    O << "\t\t\t\t" << TAI->getCommentString() << " ";
+    PrintUnmangledNameSafely(GVar, O);
+  }
+  O << "\n";
+  if (TAI->hasDotTypeDotSizeDirective())
+    O << "\t.size " << name << ", " << Size << "\n";
+
+  // If the initializer is a extern weak symbol, remember to emit the weak
+  // reference!
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    if (GV->hasExternalWeakLinkage())
+      ExtWeakSymbols.insert(GV);
+
+  EmitGlobalConstant(C);
+  O << '\n';
+}
+
+
+bool ARMAsmPrinter::doFinalization(Module &M) {
+  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    printModuleLevelGV(I);
+
+  if (Subtarget->isTargetDarwin()) {
+    SwitchToDataSection("");
+
+    // Output stubs for dynamically-linked functions
+    for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
+         i != e; ++i) {
+      if (TM.getRelocationModel() == Reloc::PIC_)
+        SwitchToTextSection(".section __TEXT,__picsymbolstub4,symbol_stubs,"
+                            "none,16", 0);
+      else
+        SwitchToTextSection(".section __TEXT,__symbol_stub4,symbol_stubs,"
+                            "none,12", 0);
+
+      EmitAlignment(2);
+      O << "\t.code\t32\n";
+
+      const char *p = i->getKeyData();
+      printSuffixedName(p, "$stub");
+      O << ":\n";
+      O << "\t.indirect_symbol " << p << "\n";
+      O << "\tldr ip, ";
+      printSuffixedName(p, "$slp");
+      O << "\n";
+      if (TM.getRelocationModel() == Reloc::PIC_) {
+        printSuffixedName(p, "$scv");
+        O << ":\n";
+        O << "\tadd ip, pc, ip\n";
+      }
+      O << "\tldr pc, [ip, #0]\n";
+      printSuffixedName(p, "$slp");
+      O << ":\n";
+      O << "\t.long\t";
+      printSuffixedName(p, "$lazy_ptr");
+      if (TM.getRelocationModel() == Reloc::PIC_) {
+        O << "-(";
+        printSuffixedName(p, "$scv");
+        O << "+8)\n";
+      } else
+        O << "\n";
+      SwitchToDataSection(".lazy_symbol_pointer", 0);
+      printSuffixedName(p, "$lazy_ptr");
+      O << ":\n";
+      O << "\t.indirect_symbol " << p << "\n";
+      O << "\t.long\tdyld_stub_binding_helper\n";
+    }
+    O << "\n";
+
+    // Output non-lazy-pointers for external and common global variables.
+    if (!GVNonLazyPtrs.empty()) {
+      SwitchToDataSection("\t.non_lazy_symbol_pointer", 0);
+      for (StringSet<>::iterator i =  GVNonLazyPtrs.begin(),
+             e = GVNonLazyPtrs.end(); i != e; ++i) {
+        const char *p = i->getKeyData();
+        printSuffixedName(p, "$non_lazy_ptr");
+        O << ":\n";
+        O << "\t.indirect_symbol " << p << "\n";
+        O << "\t.long\t0\n";
+      }
+    }
+
+    if (!HiddenGVNonLazyPtrs.empty()) {
+      SwitchToSection(TAI->getDataSection());
+      for (StringSet<>::iterator i = HiddenGVNonLazyPtrs.begin(),
+             e = HiddenGVNonLazyPtrs.end(); i != e; ++i) {
+        const char *p = i->getKeyData();
+        EmitAlignment(2);
+        printSuffixedName(p, "$non_lazy_ptr");
+        O << ":\n";
+        O << "\t.long " << p << "\n";
+      }
+    }
+
+
+    // Emit initial debug information.
+    DW->EndModule();
+
+    // Funny Darwin hack: This flag tells the linker that no global symbols
+    // contain code that falls through to other global symbols (e.g. the obvious
+    // implementation of multiple entry points).  If this doesn't occur, the
+    // linker can safely perform dead code stripping.  Since LLVM never
+    // generates code that does this, it is always safe to set.
+    O << "\t.subsections_via_symbols\n";
+  } else {
+    // Emit final debug information for ELF.
+    DW->EndModule();
+  }
+
+  return AsmPrinter::doFinalization(M);
+}
+
+/// createARMCodePrinterPass - Returns a pass that prints the ARM
+/// assembly code for a MachineFunction to the given output stream,
+/// using the given target machine description.  This should work
+/// regardless of whether the function is in SSA form.
+///
+FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
+                                             ARMTargetMachine &tm,
+                                             CodeGenOpt::Level OptLevel,
+                                             bool verbose) {
+  return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
+}
+
+namespace {
+  static struct Register {
+    Register() {
+      ARMTargetMachine::registerAsmPrinter(createARMCodePrinterPass);
+    }
+  } Registrator;
+}
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..524a748
--- /dev/null
+++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_partially_linked_object(LLVMARMAsmPrinter
+  ARMAsmPrinter.cpp
+  )
+
+target_name_of_partially_linked_object(LLVMARMCodeGen n)
+
+add_dependencies(LLVMARMAsmPrinter ${n})
diff --git a/lib/Target/ARM/AsmPrinter/Makefile b/lib/Target/ARM/AsmPrinter/Makefile
new file mode 100644
index 0000000..ce36cec
--- /dev/null
+++ b/lib/Target/ARM/AsmPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
new file mode 100644
index 0000000..2ac40f5
--- /dev/null
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_TARGET_DEFINITIONS ARM.td)
+
+tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(ARMGenRegisterNames.inc -gen-register-enums)
+tablegen(ARMGenRegisterInfo.inc -gen-register-desc)
+tablegen(ARMGenInstrNames.inc -gen-instr-enums)
+tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
+tablegen(ARMGenCodeEmitter.inc -gen-emitter)
+tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(ARMGenCallingConv.inc -gen-callingconv)
+tablegen(ARMGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(ARMCodeGen
+  ARMCodeEmitter.cpp
+  ARMConstantIslandPass.cpp
+  ARMConstantPoolValue.cpp
+  ARMInstrInfo.cpp
+  ARMISelDAGToDAG.cpp
+  ARMISelLowering.cpp
+  ARMJITInfo.cpp
+  ARMLoadStoreOptimizer.cpp
+  ARMRegisterInfo.cpp
+  ARMSubtarget.cpp
+  ARMTargetAsmInfo.cpp
+  ARMTargetMachine.cpp
+  )
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
new file mode 100644
index 0000000..9a3b9be
--- /dev/null
+++ b/lib/Target/ARM/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMARMCodeGen
+TARGET = ARM
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
+                ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
+                ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
+                ARMGenDAGISel.inc ARMGenSubtarget.inc \
+                ARMGenCodeEmitter.inc ARMGenCallingConv.inc
+
+DIRS = AsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
new file mode 100644
index 0000000..4d3200b
--- /dev/null
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -0,0 +1,228 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend (Thumb specific).
+//===---------------------------------------------------------------------===//
+
+* Add support for compiling functions in both ARM and Thumb mode, then taking
+  the smallest.
+
+* Add support for compiling individual basic blocks in thumb mode, when in a 
+  larger ARM function.  This can be used for presumed cold code, like paths
+  to abort (failure path of asserts), EH handling code, etc.
+
+* Thumb doesn't have normal pre/post increment addressing modes, but you can
+  load/store 32-bit integers with pre/postinc by using load/store multiple
+  instrs with a single register.
+
+* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
+  and cmp instructions can use high registers. Also, we can use them as
+  temporaries to spill values into.
+
+* In thumb mode, short, byte, and bool preferred alignments are currently set
+  to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple
+  of 4).
+
+//===---------------------------------------------------------------------===//
+
+Potential jumptable improvements:
+
+* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit
+  jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the
+  function is even smaller. This also applies to ARM.
+
+* Thumb jumptable codegen can improve given some help from the assembler. This
+  is what we generate right now:
+
+	.set PCRELV0, (LJTI1_0_0-(LPCRELL0+4))
+LPCRELL0:
+	mov r1, #PCRELV0
+	add r1, pc
+	ldr r0, [r0, r1]
+	cpy pc, r0 
+	.align	2
+LJTI1_0_0:
+	.long	 LBB1_3
+        ...
+
+Note there is another pc relative add that we can take advantage of.
+     add r1, pc, #imm_8 * 4
+
+We should be able to generate:
+
+LPCRELL0:
+	add r1, LJTI1_0_0
+	ldr r0, [r0, r1]
+	cpy pc, r0 
+	.align	2
+LJTI1_0_0:
+	.long	 LBB1_3
+
+if the assembler can translate the add to:
+       add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc)
+
+Note the assembler also does something similar to constpool load:
+LPCRELL0:
+     ldr r0, LCPI1_0
+=>
+     ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc)
+
+
+//===---------------------------------------------------------------------===//
+
+We compiles the following:
+
+define i16 @func_entry_2E_ce(i32 %i) {
+        switch i32 %i, label %bb12.exitStub [
+                 i32 0, label %bb4.exitStub
+                 i32 1, label %bb9.exitStub
+                 i32 2, label %bb4.exitStub
+                 i32 3, label %bb4.exitStub
+                 i32 7, label %bb9.exitStub
+                 i32 8, label %bb.exitStub
+                 i32 9, label %bb9.exitStub
+        ]
+
+bb12.exitStub:
+        ret i16 0
+
+bb4.exitStub:
+        ret i16 1
+
+bb9.exitStub:
+        ret i16 2
+
+bb.exitStub:
+        ret i16 3
+}
+
+into:
+
+_func_entry_2E_ce:
+        mov r2, #1
+        lsl r2, r0
+        cmp r0, #9
+        bhi LBB1_4      @bb12.exitStub
+LBB1_1: @newFuncRoot
+        mov r1, #13
+        tst r2, r1
+        bne LBB1_5      @bb4.exitStub
+LBB1_2: @newFuncRoot
+        ldr r1, LCPI1_0
+        tst r2, r1
+        bne LBB1_6      @bb9.exitStub
+LBB1_3: @newFuncRoot
+        mov r1, #1
+        lsl r1, r1, #8
+        tst r2, r1
+        bne LBB1_7      @bb.exitStub
+LBB1_4: @bb12.exitStub
+        mov r0, #0
+        bx lr
+LBB1_5: @bb4.exitStub
+        mov r0, #1
+        bx lr
+LBB1_6: @bb9.exitStub
+        mov r0, #2
+        bx lr
+LBB1_7: @bb.exitStub
+        mov r0, #3
+        bx lr
+LBB1_8:
+        .align  2
+LCPI1_0:
+        .long   642
+
+
+gcc compiles to:
+
+	cmp	r0, #9
+	@ lr needed for prologue
+	bhi	L2
+	ldr	r3, L11
+	mov	r2, #1
+	mov	r1, r2, asl r0
+	ands	r0, r3, r2, asl r0
+	movne	r0, #2
+	bxne	lr
+	tst	r1, #13
+	beq	L9
+L3:
+	mov	r0, r2
+	bx	lr
+L9:
+	tst	r1, #256
+	movne	r0, #3
+	bxne	lr
+L2:
+	mov	r0, #0
+	bx	lr
+L12:
+	.align 2
+L11:
+	.long	642
+        
+
+GCC is doing a couple of clever things here:
+  1. It is predicating one of the returns.  This isn't a clear win though: in
+     cases where that return isn't taken, it is replacing one condbranch with
+     two 'ne' predicated instructions.
+  2. It is sinking the shift of "1 << i" into the tst, and using ands instead of
+     tst.  This will probably require whole function isel.
+  3. GCC emits:
+  	tst	r1, #256
+     we emit:
+        mov r1, #1
+        lsl r1, r1, #8
+        tst r2, r1
+  
+
+//===---------------------------------------------------------------------===//
+
+When spilling in thumb mode and the sp offset is too large to fit in the ldr /
+str offset field, we load the offset from a constpool entry and add it to sp:
+
+ldr r2, LCPI
+add r2, sp
+ldr r2, [r2]
+
+These instructions preserve the condition code which is important if the spill
+is between a cmp and a bcc instruction. However, we can use the (potentially)
+cheaper sequnce if we know it's ok to clobber the condition register.
+
+add r2, sp, #255 * 4
+add r2, #132
+ldr r2, [r2, #7 * 4]
+
+This is especially bad when dynamic alloca is used. The all fixed size stack
+objects are referenced off the frame pointer with negative offsets. See
+oggenc for an example.
+
+//===---------------------------------------------------------------------===//
+
+We are reserving R3 as a scratch register under thumb mode. So if it is live in
+to the function, we save / restore R3 to / from R12. Until register scavenging
+is done, we should save R3 to a high callee saved reg at emitPrologue time
+(when hasFP is true or stack size is large) and restore R3 from that register
+instead. This allows us to at least get rid of the save to r12 everytime it is
+used.
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen test/CodeGen/ARM/select.ll f7:
+
+	ldr r5, LCPI1_0
+LPC0:
+	add r5, pc
+	ldr r6, LCPI1_1
+	ldr r2, LCPI1_2
+	cpy r3, r6
+	cpy lr, pc
+	bx r5
+
+//===---------------------------------------------------------------------===//
+
+Make register allocator / spiller smarter so we can re-materialize "mov r, imm",
+etc. Almost all Thumb instructions clobber condition code.
+
+//===---------------------------------------------------------------------===//
+
+Add ldmia, stmia support.
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
new file mode 100644
index 0000000..068c441e
--- /dev/null
+++ b/lib/Target/ARM/README.txt
@@ -0,0 +1,554 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend.
+//===---------------------------------------------------------------------===//
+
+Reimplement 'select' in terms of 'SEL'.
+
+* We would really like to support UXTAB16, but we need to prove that the
+  add doesn't need to overflow between the two 16-bit chunks.
+
+* Implement pre/post increment support.  (e.g. PR935)
+* Coalesce stack slots!
+* Implement smarter constant generation for binops with large immediates.
+
+* Consider materializing FP constants like 0.0f and 1.0f using integer 
+  immediate instructions then copy to FPU.  Slower than load into FPU?
+
+//===---------------------------------------------------------------------===//
+
+Crazy idea:  Consider code that uses lots of 8-bit or 16-bit values.  By the
+time regalloc happens, these values are now in a 32-bit register, usually with
+the top-bits known to be sign or zero extended.  If spilled, we should be able
+to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part
+of the reload.
+
+Doing this reduces the size of the stack frame (important for thumb etc), and
+also increases the likelihood that we will be able to reload multiple values
+from the stack with a single load.
+
+//===---------------------------------------------------------------------===//
+
+The constant island pass is in good shape.  Some cleanups might be desirable,
+but there is unlikely to be much improvement in the generated code.
+
+1.  There may be some advantage to trying to be smarter about the initial
+placement, rather than putting everything at the end.
+
+2.  There might be some compile-time efficiency to be had by representing
+consecutive islands as a single block rather than multiple blocks.
+
+3.  Use a priority queue to sort constant pool users in inverse order of
+    position so we always process the one closed to the end of functions
+    first. This may simply CreateNewWater.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate copysign custom expansion. We are still generating crappy code with
+default expansion + if-conversion.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate one instruction from:
+
+define i32 @_Z6slow4bii(i32 %x, i32 %y) {
+        %tmp = icmp sgt i32 %x, %y
+        %retval = select i1 %tmp, i32 %x, i32 %y
+        ret i32 %retval
+}
+
+__Z6slow4bii:
+        cmp r0, r1
+        movgt r1, r0
+        mov r0, r1
+        bx lr
+=>
+
+__Z6slow4bii:
+        cmp r0, r1
+        movle r0, r1
+        bx lr
+
+//===---------------------------------------------------------------------===//
+
+Implement long long "X-3" with instructions that fold the immediate in.  These
+were disabled due to badness with the ARM carry flag on subtracts.
+
+//===---------------------------------------------------------------------===//
+
+We currently compile abs:
+int foo(int p) { return p < 0 ? -p : p; }
+
+into:
+
+_foo:
+        rsb r1, r0, #0
+        cmn r0, #1
+        movgt r1, r0
+        mov r0, r1
+        bx lr
+
+This is very, uh, literal.  This could be a 3 operation sequence:
+  t = (p sra 31); 
+  res = (p xor t)-t
+
+Which would be better.  This occurs in png decode.
+
+//===---------------------------------------------------------------------===//
+
+More load / store optimizations:
+1) Look past instructions without side-effects (not load, store, branch, etc.)
+   when forming the list of loads / stores to optimize.
+
+2) Smarter register allocation?
+We are probably missing some opportunities to use ldm / stm. Consider:
+
+ldr r5, [r0]
+ldr r4, [r0, #4]
+
+This cannot be merged into a ldm. Perhaps we will need to do the transformation
+before register allocation. Then teach the register allocator to allocate a
+chunk of consecutive registers.
+
+3) Better representation for block transfer? This is from Olden/power:
+
+	fldd d0, [r4]
+	fstd d0, [r4, #+32]
+	fldd d0, [r4, #+8]
+	fstd d0, [r4, #+40]
+	fldd d0, [r4, #+16]
+	fstd d0, [r4, #+48]
+	fldd d0, [r4, #+24]
+	fstd d0, [r4, #+56]
+
+If we can spare the registers, it would be better to use fldm and fstm here.
+Need major register allocator enhancement though.
+
+4) Can we recognize the relative position of constantpool entries? i.e. Treat
+
+	ldr r0, LCPI17_3
+	ldr r1, LCPI17_4
+	ldr r2, LCPI17_5
+
+   as
+	ldr r0, LCPI17
+	ldr r1, LCPI17+4
+	ldr r2, LCPI17+8
+
+   Then the ldr's can be combined into a single ldm. See Olden/power.
+
+Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
+double 64-bit FP constant:
+
+	adr	r0, L6
+	ldmia	r0, {r0-r1}
+
+	.align 2
+L6:
+	.long	-858993459
+	.long	1074318540
+
+5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
+ldrd/strd instead if there are only two destination registers that form an
+odd/even pair. However, we probably would pay a penalty if the address is not
+aligned on 8-byte boundary. This requires more information on load / store
+nodes (and MI's?) then we currently carry.
+
+6) struct copies appear to be done field by field 
+instead of by words, at least sometimes:
+
+struct foo { int x; short s; char c1; char c2; };
+void cpy(struct foo*a, struct foo*b) { *a = *b; }
+
+llvm code (-O2)
+        ldrb r3, [r1, #+6]
+        ldr r2, [r1]
+        ldrb r12, [r1, #+7]
+        ldrh r1, [r1, #+4]
+        str r2, [r0]
+        strh r1, [r0, #+4]
+        strb r3, [r0, #+6]
+        strb r12, [r0, #+7]
+gcc code (-O2)
+        ldmia   r1, {r1-r2}
+        stmia   r0, {r1-r2}
+
+In this benchmark poor handling of aggregate copies has shown up as
+having a large effect on size, and possibly speed as well (we don't have
+a good way to measure on ARM).
+
+//===---------------------------------------------------------------------===//
+
+* Consider this silly example:
+
+double bar(double x) {  
+  double r = foo(3.1);
+  return x+r;
+}
+
+_bar:
+        stmfd sp!, {r4, r5, r7, lr}
+        add r7, sp, #8
+        mov r4, r0
+        mov r5, r1
+        fldd d0, LCPI1_0
+        fmrrd r0, r1, d0
+        bl _foo
+        fmdrr d0, r4, r5
+        fmsr s2, r0
+        fsitod d1, s2
+        faddd d0, d1, d0
+        fmrrd r0, r1, d0
+        ldmfd sp!, {r4, r5, r7, pc}
+
+Ignore the prologue and epilogue stuff for a second. Note 
+	mov r4, r0
+	mov r5, r1
+the copys to callee-save registers and the fact they are only being used by the
+fmdrr instruction. It would have been better had the fmdrr been scheduled
+before the call and place the result in a callee-save DPR register. The two
+mov ops would not have been necessary.
+
+//===---------------------------------------------------------------------===//
+
+Calling convention related stuff:
+
+* gcc's parameter passing implementation is terrible and we suffer as a result:
+
+e.g.
+struct s {
+  double d1;
+  int s1;
+};
+
+void foo(struct s S) {
+  printf("%g, %d\n", S.d1, S.s1);
+}
+
+'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
+then reload them to r1, r2, and r3 before issuing the call (r0 contains the
+address of the format string):
+
+	stmfd	sp!, {r7, lr}
+	add	r7, sp, #0
+	sub	sp, sp, #12
+	stmia	sp, {r0, r1, r2}
+	ldmia	sp, {r1-r2}
+	ldr	r0, L5
+	ldr	r3, [sp, #8]
+L2:
+	add	r0, pc, r0
+	bl	L_printf$stub
+
+Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
+
+* Return an aggregate type is even worse:
+
+e.g.
+struct s foo(void) {
+  struct s S = {1.1, 2};
+  return S;
+}
+
+	mov	ip, r0
+	ldr	r0, L5
+	sub	sp, sp, #12
+L2:
+	add	r0, pc, r0
+	@ lr needed for prologue
+	ldmia	r0, {r0, r1, r2}
+	stmia	sp, {r0, r1, r2}
+	stmia	ip, {r0, r1, r2}
+	mov	r0, ip
+	add	sp, sp, #12
+	bx	lr
+
+r0 (and later ip) is the hidden parameter from caller to store the value in. The
+first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
+r2 into the address passed in. However, there is one additional stmia that
+stores r0, r1, and r2 to some stack location. The store is dead.
+
+The llvm-gcc generated code looks like this:
+
+csretcc void %foo(%struct.s* %agg.result) {
+entry:
+	%S = alloca %struct.s, align 4		; <%struct.s*> [#uses=1]
+	%memtmp = alloca %struct.s		; <%struct.s*> [#uses=1]
+	cast %struct.s* %S to sbyte*		; <sbyte*>:0 [#uses=2]
+	call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
+	cast %struct.s* %agg.result to sbyte*		; <sbyte*>:1 [#uses=2]
+	call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
+	cast %struct.s* %memtmp to sbyte*		; <sbyte*>:2 [#uses=1]
+	call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
+	ret void
+}
+
+llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
+constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
+into a number of load and stores, or 2) custom lower memcpy (of small size) to
+be ldmia / stmia. I think option 2 is better but the current register
+allocator cannot allocate a chunk of registers at a time.
+
+A feasible temporary solution is to use specific physical registers at the
+lowering time for small (<= 4 words?) transfer size.
+
+* ARM CSRet calling convention requires the hidden argument to be returned by
+the callee.
+
+//===---------------------------------------------------------------------===//
+
+We can definitely do a better job on BB placements to eliminate some branches.
+It's very common to see llvm generated assembly code that looks like this:
+
+LBB3:
+ ...
+LBB4:
+...
+  beq LBB3
+  b LBB2
+
+If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
+then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
+
+See McCat/18-imp/ComputeBoundingBoxes for an example.
+
+//===---------------------------------------------------------------------===//
+
+Register scavenging is now implemented.  The example in the previous version
+of this document produces optimal code at -O2.
+
+//===---------------------------------------------------------------------===//
+
+Pre-/post- indexed load / stores:
+
+1) We should not make the pre/post- indexed load/store transform if the base ptr
+is guaranteed to be live beyond the load/store. This can happen if the base
+ptr is live out of the block we are performing the optimization. e.g.
+
+mov r1, r2
+ldr r3, [r1], #4
+...
+
+vs.
+
+ldr r3, [r2]
+add r1, r2, #4
+...
+
+In most cases, this is just a wasted optimization. However, sometimes it can
+negatively impact the performance because two-address code is more restrictive
+when it comes to scheduling.
+
+Unfortunately, liveout information is currently unavailable during DAG combine
+time.
+
+2) Consider spliting a indexed load / store into a pair of add/sub + load/store
+   to solve #1 (in TwoAddressInstructionPass.cpp).
+
+3) Enhance LSR to generate more opportunities for indexed ops.
+
+4) Once we added support for multiple result patterns, write indexed loads
+   patterns instead of C++ instruction selection code.
+
+5) Use FLDM / FSTM to emulate indexed FP load / store.
+
+//===---------------------------------------------------------------------===//
+
+We should add i64 support to take advantage of the 64-bit load / stores.
+We can add a pseudo i64 register class containing pseudo registers that are
+register pairs. All other ops (e.g. add, sub) would be expanded as usual.
+
+We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
+from the i64 register. These are single moves which can be eliminated if the
+destination register is a sub-register of the source. We should implement proper
+subreg support in the register allocator to coalesce these away.
+
+There are other minor issues such as multiple instructions for a spill / restore
+/ move.
+
+//===---------------------------------------------------------------------===//
+
+Implement support for some more tricky ways to materialize immediates.  For
+example, to get 0xffff8000, we can use:
+
+mov r9, #&3f8000
+sub r9, r9, #&400000
+
+//===---------------------------------------------------------------------===//
+
+We sometimes generate multiple add / sub instructions to update sp in prologue
+and epilogue if the inc / dec value is too large to fit in a single immediate
+operand. In some cases, perhaps it might be better to load the value from a
+constantpool instead.
+
+//===---------------------------------------------------------------------===//
+
+GCC generates significantly better code for this function.
+
+int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
+    int i = 0;
+
+    if (StackPtr != 0) {
+       while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
+          Line[i++] = Stack[--StackPtr];
+        if (LineLen > 32768)
+        {
+            while (StackPtr != 0 && i < LineLen)
+            {
+                i++;
+                --StackPtr;
+            }
+        }
+    }
+    return StackPtr;
+}
+
+//===---------------------------------------------------------------------===//
+
+This should compile to the mlas instruction:
+int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
+
+//===---------------------------------------------------------------------===//
+
+At some point, we should triage these to see if they still apply to us:
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
+
+http://www.inf.u-szeged.hu/gcc-arm/
+http://citeseer.ist.psu.edu/debus04linktime.html
+
+//===---------------------------------------------------------------------===//
+
+gcc generates smaller code for this function at -O2 or -Os:
+
+void foo(signed char* p) {
+  if (*p == 3)
+     bar();
+   else if (*p == 4)
+    baz();
+  else if (*p == 5)
+    quux();
+}
+
+llvm decides it's a good idea to turn the repeated if...else into a
+binary tree, as if it were a switch; the resulting code requires -1 
+compare-and-branches when *p<=2 or *p==5, the same number if *p==4
+or *p>6, and +1 if *p==3.  So it should be a speed win
+(on balance).  However, the revised code is larger, with 4 conditional 
+branches instead of 3.
+
+More seriously, there is a byte->word extend before
+each comparison, where there should be only one, and the condition codes
+are not remembered when the same two values are compared twice.
+
+//===---------------------------------------------------------------------===//
+
+More register scavenging work:
+
+1. Use the register scavenger to track frame index materialized into registers
+   (those that do not fit in addressing modes) to allow reuse in the same BB.
+2. Finish scavenging for Thumb.
+3. We know some spills and restores are unnecessary. The issue is once live
+   intervals are merged, they are not never split. So every def is spilled
+   and every use requires a restore if the register allocator decides the
+   resulting live interval is not assigned a physical register. It may be
+   possible (with the help of the scavenger) to turn some spill / restore
+   pairs into register copies.
+
+//===---------------------------------------------------------------------===//
+
+More LSR enhancements possible:
+
+1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged
+   in a load / store.
+2. Allow iv reuse even when a type conversion is required. For example, i8
+   and i32 load / store addressing modes are identical.
+
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+int foo(int a, int b, int c, int d) {
+  long long acc = (long long)a * (long long)b;
+  acc += (long long)c * (long long)d;
+  return (int)(acc >> 32);
+}
+
+Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies 
+two signed 32-bit values to produce a 64-bit value, and accumulates this with 
+a 64-bit value.
+
+We currently get this with both v4 and v6:
+
+_foo:
+        smull r1, r0, r1, r0
+        smull r3, r2, r3, r2
+        adds r3, r3, r1
+        adc r0, r2, r0
+        bx lr
+
+//===---------------------------------------------------------------------===//
+
+This:
+        #include <algorithm>
+        std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+        { return std::make_pair(a + b, a + b < a); }
+        bool no_overflow(unsigned a, unsigned b)
+        { return !full_add(a, b).second; }
+
+Should compile to:
+
+_Z8full_addjj:
+	adds	r2, r1, r2
+	movcc	r1, #0
+	movcs	r1, #1
+	str	r2, [r0, #0]
+	strb	r1, [r0, #4]
+	mov	pc, lr
+
+_Z11no_overflowjj:
+	cmn	r0, r1
+	movcs	r0, #0
+	movcc	r0, #1
+	mov	pc, lr
+
+not:
+
+__Z8full_addjj:
+        add r3, r2, r1
+        str r3, [r0]
+        mov r2, #1
+        mov r12, #0
+        cmp r3, r1
+        movlo r12, r2
+        str r12, [r0, #+4]
+        bx lr
+__Z11no_overflowjj:
+        add r3, r1, r0
+        mov r2, #1
+        mov r1, #0
+        cmp r3, r0
+        movhs r1, r2
+        mov r0, r1
+        bx lr
+
+//===---------------------------------------------------------------------===//
+