Update LLVM sources to r73879.

author: ed <ed@FreeBSD.org> 2009-06-22 08:08:12 +0000
committer: ed <ed@FreeBSD.org> 2009-06-22 08:08:12 +0000
commit: a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe (patch)
tree: 86c1bc482baa6c81fc70b8d715153bfa93377186 /lib/Target
parent: db89e312d968c258aba3c79c1c398f5fb19267a3 (diff)
download: FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.zip
FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.tar.gz
79 files changed, 2638 insertions, 809 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 594811d..9001e50 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -45,62 +45,72 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
 // ARM Processors supported.
 //
 
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+include "ARMSchedule.td"
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, GenericItineraries, Features>;
 
 // V4 Processors.
-def : Proc<"generic",         []>;
-def : Proc<"arm8",            []>;
-def : Proc<"arm810",          []>;
-def : Proc<"strongarm",       []>;
-def : Proc<"strongarm110",    []>;
-def : Proc<"strongarm1100",   []>;
-def : Proc<"strongarm1110",   []>;
+def : ProcNoItin<"generic",         []>;
+def : ProcNoItin<"arm8",            []>;
+def : ProcNoItin<"arm810",          []>;
+def : ProcNoItin<"strongarm",       []>;
+def : ProcNoItin<"strongarm110",    []>;
+def : ProcNoItin<"strongarm1100",   []>;
+def : ProcNoItin<"strongarm1110",   []>;
 
 // V4T Processors.
-def : Proc<"arm7tdmi",        [ArchV4T]>;
-def : Proc<"arm7tdmi-s",      [ArchV4T]>;
-def : Proc<"arm710t",         [ArchV4T]>;
-def : Proc<"arm720t",         [ArchV4T]>;
-def : Proc<"arm9",            [ArchV4T]>;
-def : Proc<"arm9tdmi",        [ArchV4T]>;
-def : Proc<"arm920",          [ArchV4T]>;
-def : Proc<"arm920t",         [ArchV4T]>;
-def : Proc<"arm922t",         [ArchV4T]>;
-def : Proc<"arm940t",         [ArchV4T]>;
-def : Proc<"ep9312",          [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi",        [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi-s",      [ArchV4T]>;
+def : ProcNoItin<"arm710t",         [ArchV4T]>;
+def : ProcNoItin<"arm720t",         [ArchV4T]>;
+def : ProcNoItin<"arm9",            [ArchV4T]>;
+def : ProcNoItin<"arm9tdmi",        [ArchV4T]>;
+def : ProcNoItin<"arm920",          [ArchV4T]>;
+def : ProcNoItin<"arm920t",         [ArchV4T]>;
+def : ProcNoItin<"arm922t",         [ArchV4T]>;
+def : ProcNoItin<"arm940t",         [ArchV4T]>;
+def : ProcNoItin<"ep9312",          [ArchV4T]>;
 
 // V5T Processors.
-def : Proc<"arm10tdmi",       [ArchV5T]>;
-def : Proc<"arm1020t",        [ArchV5T]>;
+def : ProcNoItin<"arm10tdmi",       [ArchV5T]>;
+def : ProcNoItin<"arm1020t",        [ArchV5T]>;
 
 // V5TE Processors.
-def : Proc<"arm9e",           [ArchV5TE]>;
-def : Proc<"arm926ej-s",      [ArchV5TE]>;
-def : Proc<"arm946e-s",       [ArchV5TE]>;
-def : Proc<"arm966e-s",       [ArchV5TE]>;
-def : Proc<"arm968e-s",       [ArchV5TE]>;
-def : Proc<"arm10e",          [ArchV5TE]>;
-def : Proc<"arm1020e",        [ArchV5TE]>;
-def : Proc<"arm1022e",        [ArchV5TE]>;
-def : Proc<"xscale",          [ArchV5TE]>;
-def : Proc<"iwmmxt",          [ArchV5TE]>;
+def : ProcNoItin<"arm9e",           [ArchV5TE]>;
+def : ProcNoItin<"arm926ej-s",      [ArchV5TE]>;
+def : ProcNoItin<"arm946e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm966e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm968e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm10e",          [ArchV5TE]>;
+def : ProcNoItin<"arm1020e",        [ArchV5TE]>;
+def : ProcNoItin<"arm1022e",        [ArchV5TE]>;
+def : ProcNoItin<"xscale",          [ArchV5TE]>;
+def : ProcNoItin<"iwmmxt",          [ArchV5TE]>;
 
 // V6 Processors.
-def : Proc<"arm1136j-s",      [ArchV6]>;
-def : Proc<"arm1136jf-s",     [ArchV6, FeatureVFP2]>;
-def : Proc<"arm1176jz-s",     [ArchV6]>;
-def : Proc<"arm1176jzf-s",    [ArchV6, FeatureVFP2]>;
-def : Proc<"mpcorenovfp",     [ArchV6]>;
-def : Proc<"mpcore",          [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1136j-s",       V6Itineraries,
+                [ArchV6]>;
+def : Processor<"arm1136jf-s",      V6Itineraries,
+                [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1176jz-s",      V6Itineraries,
+                [ArchV6]>;
+def : Processor<"arm1176jzf-s",     V6Itineraries,
+                [ArchV6, FeatureVFP2]>;
+def : Processor<"mpcorenovfp",      V6Itineraries,
+                [ArchV6]>;
+def : Processor<"mpcore",           V6Itineraries,
+                [ArchV6, FeatureVFP2]>;
 
 // V6T2 Processors.
-def : Proc<"arm1156t2-s",     [ArchV6T2, FeatureThumb2]>;
-def : Proc<"arm1156t2f-s",    [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s",     V6Itineraries,
+                [ArchV6T2, FeatureThumb2]>;
+def : Processor<"arm1156t2f-s",    V6Itineraries,
+                [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
 
 // V7 Processors.
-def : Proc<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
-def : Proc<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : ProcNoItin<"cortex-a8",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : ProcNoItin<"cortex-a9",       [ArchV7A, FeatureThumb2, FeatureNEON]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index f126760..47151e6 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -17,11 +17,6 @@ class CCIfSubtarget<string F, CCAction A>:
 class CCIfAlign<string Align, CCAction A>:
   CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
 
-/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or
-///                "Soft".
-class CCIfFloatABI<string ABIType, CCAction A>:
-  CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>;
-
 //===----------------------------------------------------------------------===//
 // ARM APCS Calling Convention
 //===----------------------------------------------------------------------===//
@@ -105,25 +100,3 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
                                  S9, S10, S11, S12, S13, S14, S15]>>,
   CCDelegateTo<RetCC_ARM_AAPCS_Common>
 ]>;
-
-//===----------------------------------------------------------------------===//
-// ARM Calling Convention Dispatch
-//===----------------------------------------------------------------------===//
-
-def CC_ARM : CallingConv<[
-  CCIfSubtarget<"isAAPCS_ABI()",
-                CCIfSubtarget<"hasVFP2()",
-                              CCIfFloatABI<"Hard",
-                                           CCDelegateTo<CC_ARM_AAPCS_VFP>>>>,
-  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
-  CCDelegateTo<CC_ARM_APCS>
-]>;
-
-def RetCC_ARM : CallingConv<[
-  CCIfSubtarget<"isAAPCS_ABI()",
-                CCIfSubtarget<"hasVFP2()",
-                              CCIfFloatABI<"Hard",
-                                           CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>,
-  CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
-  CCDelegateTo<RetCC_ARM_APCS>
-]>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 44fac12..f6629fe 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -900,6 +900,10 @@ void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI,
   // Set first operand
   Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
 
+  // Skip LDRD and STRD's second operand.
+  if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD)
+    ++OpIdx;
+
   // Set second operand
   if (ImplicitRn)
     // Special handling for implicit use (e.g. PC).
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index ca3a9cb..1ed9e80 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -52,8 +52,13 @@ public:
 
   virtual const char *getPassName() const {
     return "ARM Instruction Selection";
-  } 
-  
+  }
+
+ /// getI32Imm - Return a target constant with the specified value, of type i32.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+
   SDNode *Select(SDValue Op);
   virtual void InstructionSelect();
   bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
@@ -84,6 +89,9 @@ public:
   bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
                              SDValue &OffImm);
 
+  bool SelectShifterOperand(SDValue Op, SDValue N,
+                            SDValue &BaseReg, SDValue &Opc);
+
   bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
                                SDValue &B, SDValue &C);
   
@@ -509,8 +517,30 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
   return false;
 }
 
+bool ARMDAGToDAGISel::SelectShifterOperand(SDValue Op,
+                                           SDValue N,
+                                           SDValue &BaseReg,
+                                           SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)))
+    ShImmVal = RHS->getZExtValue() & 31;
+  else
+    return false;
+
+  Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+
+  return true;
+}
+
 bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
-                                              SDValue N, 
+                                              SDValue N,
                                               SDValue &BaseReg,
                                               SDValue &ShReg,
                                               SDValue &Opc) {
@@ -549,6 +579,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
   switch (N->getOpcode()) {
   default: break;
   case ISD::Constant: {
+    // ARMv6T2 and later should materialize imms via MOV / MOVT pair.
+    if (Subtarget->hasV6T2Ops() || Subtarget->hasThumb2())
+      break;
+
     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
     bool UseCP = true;
     if (Subtarget->isThumb())
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ec8bd1f..2443625 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -292,6 +292,25 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
   setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
 
+  if (!Subtarget->isThumb()) {
+    // Use branch latency information to determine if-conversion limits.
+    // FIXME: If-converter should use instruction latency of the branch being
+    // eliminated to compute the threshold. For ARMv6, the branch "latency"
+    // varies depending on whether it's dynamically or statically predicted
+    // and on whether the destination is in the prefetch buffer.
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData();
+    unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass());
+    if (Latency > 1) {
+      setIfCvtBlockSizeLimit(Latency-1);
+      if (Latency > 2)
+        setIfCvtDupBlockSizeLimit(Latency-2);
+    } else {
+      setIfCvtBlockSizeLimit(10);
+      setIfCvtDupBlockSizeLimit(2);
+    }
+  }
+
   maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
   // Do not enable CodePlacementOpt for now: it currently runs after the
   // ARMConstantIslandPass and messes up branch relaxation and placement
@@ -415,7 +434,7 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                         ARM::NoRegister };
 
   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4);
-  if (Reg == 0) 
+  if (Reg == 0)
     return false; // we didn't handle it
 
   unsigned i;
@@ -487,6 +506,33 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                    State);
 }
 
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC,
+                                                 bool Return) const {
+  switch (CC) {
+  default:
+   assert(0 && "Unsupported calling convention");
+  case CallingConv::C:
+  case CallingConv::Fast:
+   // Use target triple & subtarget features to do actual dispatch.
+   if (Subtarget->isAAPCS_ABI()) {
+     if (Subtarget->hasVFP2() &&
+         FloatABIType == FloatABI::Hard)
+       return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+     else
+       return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+   } else
+     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  case CallingConv::ARM_AAPCS_VFP:
+   return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+  case CallingConv::ARM_AAPCS:
+   return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+  case CallingConv::ARM_APCS:
+   return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  }
+}
+
 /// LowerCallResult - Lower the result values of an ISD::CALL into the
 /// appropriate copies out of appropriate physical registers.  This assumes that
 /// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
@@ -501,7 +547,8 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
   SmallVector<CCValAssign, 16> RVLocs;
   bool isVarArg = TheCall->isVarArg();
   CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
-  CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM);
+  CCInfo.AnalyzeCallResult(TheCall,
+                           CCAssignFnForNode(CallingConv, /* Return*/ true));
 
   SmallVector<SDValue, 8> ResultVals;
 
@@ -586,8 +633,6 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   MVT RetVT           = TheCall->getRetValType(0);
   SDValue Chain       = TheCall->getChain();
   unsigned CC         = TheCall->getCallingConv();
-  assert((CC == CallingConv::C ||
-          CC == CallingConv::Fast) && "unknown calling convention");
   bool isVarArg       = TheCall->isVarArg();
   SDValue Callee      = TheCall->getCallee();
   DebugLoc dl         = TheCall->getDebugLoc();
@@ -595,7 +640,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeCallOperands(TheCall, CC_ARM);
+  CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC, /* Return*/ false));
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -788,7 +833,7 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
   CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
 
   // Analyze return values of ISD::RET.
-  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM);
+  CCInfo.AnalyzeReturn(Op.getNode(), CCAssignFnForNode(CC, /* Return */ true));
 
   // If this is the first return lowered for this function, add
   // the regs to the liveout set for the function.
@@ -1085,7 +1130,8 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
-  CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM);
+  CCInfo.AnalyzeFormalArguments(Op.getNode(),
+                                CCAssignFnForNode(CC, /* Return*/ false));
 
   SmallVector<SDValue, 16> ArgValues;
 
@@ -1456,7 +1502,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
   MVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces())
+  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
     ? ARM::R7 : ARM::R11;
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 2dab2db..8f53e39 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -151,6 +151,7 @@ namespace llvm {
     ///
     unsigned ARMPCLabelIndex;
 
+    CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const;
     SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
                              const SDValue &StackPtr, const CCValAssign &VA,
                              SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags);
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 4b0dbb5..d19fb8e 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -697,7 +697,6 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
   MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
-  MBB.insert(MI, PopMI);
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
     if (Reg == ARM::LR) {
@@ -706,10 +705,15 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
         continue;
       Reg = ARM::PC;
       PopMI->setDesc(get(ARM::tPOP_RET));
-      MBB.erase(MI);
+      MI = MBB.erase(MI);
     }
     PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
   }
+
+  // It's illegal to emit pop instruction without operands.
+  if (PopMI->getNumOperands() > 0)
+    MBB.insert(MI, PopMI);
+
   return true;
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cc9f1a5..4707e3b 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -90,12 +90,12 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
-def HasV5T   : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE  : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6    : Predicate<"Subtarget->hasV6Ops()">;
-def IsThumb  : Predicate<"Subtarget->isThumb()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM    : Predicate<"!Subtarget->isThumb()">;
+def HasV5T    : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE   : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6     : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb   : Predicate<"Subtarget->isThumb()">;
+def HasThumb2 : Predicate<"Subtarget->hasThumb2()">;
+def IsARM     : Predicate<"!Subtarget->isThumb()">;
 
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
@@ -539,7 +539,7 @@ let isReturn = 1, isTerminator = 1 in
                     LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
                     []>;
 
-let isCall = 1,
+let isCall = 1, Itinerary = IIC_Br,
   Defs = [R0, R1, R2, R3, R12, LR,
           D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
   def BL  : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
@@ -567,7 +567,7 @@ let isCall = 1,
   }
 }
 
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
   // B is "predicable" since it can be xformed into a Bcc.
   let isBarrier = 1 in {
     let isPredicable = 1 in
@@ -647,9 +647,8 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
 
 let mayLoad = 1 in {
 // Load doubleword
-def LDRD  : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
-                 "ldr", "d $dst, $addr",
-                []>, Requires<[IsARM, HasV5T]>;
+def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
+                "ldr", "d $dst1, $addr", []>, Requires<[IsARM, HasV5T]>;
 
 // Indexed loads
 def LDR_PRE  : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
@@ -709,9 +708,8 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
 
 // Store doubleword
 let mayStore = 1 in
-def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
-               "str", "d $src, $addr",
-               []>, Requires<[IsARM, HasV5T]>;
+def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),StMiscFrm,
+               "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5T]>;
 
 // Indexed stores
 def STR_PRE  : AI2stwpr<(outs GPR:$base_wb),
@@ -1387,6 +1385,12 @@ def : ARMV5TEPat<(add GPR:$acc,
 include "ARMInstrThumb.td"
 
 //===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
 // Floating Point Support
 //
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 54232f6..9297f08 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -277,6 +277,7 @@ def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
 //
 
 // Add with carry
+let isCommutable = 1 in
 def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "adc $dst, $rhs",
                [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
@@ -311,6 +312,7 @@ def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
                   "add $dst, $rhs * 4", []>;
 
+let isCommutable = 1 in
 def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "and $dst, $rhs",
                 [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
@@ -358,6 +360,7 @@ def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
 
 // TODO: A7-37: CMP(3) - cmp hi regs
 
+let isCommutable = 1 in
 def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "eor $dst, $rhs",
                [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
@@ -399,6 +402,7 @@ def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
                       "cpy $dst, $src\t@ hir2hir", []>;
 } // neverHasSideEffects
 
+let isCommutable = 1 in
 def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "mul $dst, $rhs",
                [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
@@ -411,6 +415,7 @@ def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src),
               "neg $dst, $src",
               [(set tGPR:$dst, (ineg tGPR:$src))]>;
 
+let isCommutable = 1 in
 def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "orr $dst, $rhs",
                [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 168fb45..07c71da 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -10,3 +10,199 @@
 // This file describes the Thumb2 instruction set.
 //
 //===----------------------------------------------------------------------===//
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>,    // reg imm
+                ComplexPattern<i32, 2, "SelectShifterOperand",
+                               [shl,srl,sra,rotr]> {
+  let PrintMethod = "printSOOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+def LO16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def imm16high : PatLeaf<(i32 imm), [{
+  // Returns true if all bits out of the [31..16] range are 0.
+  return ((N->getZExtValue() & 0xFFFF0000ULL) == N->getZExtValue());
+}], HI16>;
+
+def imm16high0xffff : PatLeaf<(i32 imm), [{
+  // Returns true if lo 16 bits are set and this is a 32-bit value. 
+  return ((N->getZExtValue() & 0x0000FFFFULL) == 0xFFFFULL);
+}], HI16>;
+
+def imm0_4095 : PatLeaf<(i32 imm), [{ 
+  return (uint32_t)N->getZExtValue() < 4096; 
+}]>; 
+
+def imm0_4095_neg : PatLeaf<(i32 imm), [{ 
+ return (uint32_t)-N->getZExtValue() < 4096; 
+}], imm_neg_XFORM>; 
+
+def imm0_65535 : PatLeaf<(i32 imm), [{ 
+  return N->getZExtValue() < 65536; 
+}]>; 
+
+// A6.3.2 Modified immediate constants in Thumb instructions (#<const>)
+// FIXME: Move it the the addrmode matcher code.
+def t2_so_imm : PatLeaf<(i32 imm), [{
+  uint64_t v = N->getZExtValue();
+  if (v == 0 || v > 0xffffffffUL) return false;
+  // variant1 - 0b0000x - 8-bit which could be zero (not supported for now)
+
+  // variant2 - 0b00nnx - 8-bit repeated inside the 32-bit room
+  unsigned hi16 = (unsigned)(v >> 16);
+  unsigned lo16 = (unsigned)(v & 0xffffUL);
+  bool valid = (hi16 == lo16) && (
+    (v & 0x00ff00ffUL) == 0 ||        // type 0001x
+    (v & 0xff00ff00UL) == 0 ||        // type 0010x
+    ((lo16 >> 8) == (lo16 & 0xff)));  // type 0011x
+  if (valid) return true;
+
+  // variant3 - 0b01000..0b11111 - 8-bit shifted inside the 32-bit room
+  unsigned shift = CountLeadingZeros_32(v);
+  uint64_t mask = (0xff000000ULL >> shift);
+  // If valid, it is type 01000 + shift
+  return ((shift < 24) && (v & mask) > 0) && ((v & (~mask)) == 0);
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+//  Thumb-2 to cover the functionality of the ARM instruction set.
+//
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|reg|so_reg}) patterns for a
+//  binary operation that produces a value.
+multiclass T2I_bin_irs<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                       !strconcat(opc, " $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                      Requires<[HasThumb2]>;
+   // register
+   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                       !strconcat(opc, " $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+   // shifted register
+   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                       !strconcat(opc, " $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                       !strconcat(opc, "s $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                      Requires<[HasThumb2]>;
+
+   // register
+   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                       !strconcat(opc, "s $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+
+   // shifted register
+   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                       !strconcat(opc, "s $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+}
+}
+
+/// T2I_bin_c_irs - Similar to T2I_bin_irs except it uses the 's' bit. Also the
+/// instruction can optionally set the CPSR register.
+let Uses = [CPSR] in {
+multiclass T2I_bin_c_irs<string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs, cc_out:$s),
+                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                      Requires<[HasThumb2]>;
+
+   // register
+   def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cc_out:$s),
+                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+
+   // shifted register
+   def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s),
+                       !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+                       [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, 
+                      Requires<[HasThumb2]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+def tMOVi16  : PseudoInst<(outs GPR:$dst), (ins i32imm:$src),
+                          "movw $dst, $src",
+                          [(set GPR:$dst, imm0_65535:$src)]>, 
+                         Requires<[HasThumb2]>;
+
+let isTwoAddress = 1 in
+def tMOVTi16 : PseudoInst<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+                          "movt $dst, $imm",
+                          [(set GPR:$dst, (or (and GPR:$src, 0xffff), 
+                                              imm16high:$imm))]>,
+                         Requires<[HasThumb2]>;
+
+def : Pat<(and (or GPR:$src, imm16high:$imm1), imm16high0xffff:$imm2),
+          (tMOVTi16 GPR:$src, (HI16 imm16high:$imm1))>,
+         Requires<[HasThumb2]>;
+
+def : Pat<(i32 imm:$imm),
+          (tMOVTi16 (tMOVi16 (LO16 imm:$imm)),(HI16 imm:$imm))>,
+         Requires<[HasThumb2]>;
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+defm t2ADD  : T2I_bin_irs <"add", BinOpFrag<(add node:$LHS, node:$RHS)>>;
+defm t2SUB  : T2I_bin_irs <"sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+def tADDri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                          "add $dst, $lhs, $rhs", 
+                          [(set GPR:$dst, (add GPR:$lhs, imm0_4095:$rhs))]>,
+                         Requires<[HasThumb2]>; 
+def tSUBri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), 
+                          "sub $dst, $lhs, $rhs",
+                          [(set GPR:$dst, (add GPR:$lhs, imm0_4095_neg:$rhs))]>,
+                         Requires<[HasThumb2]>;
+
+defm t2ADDS : T2I_bin_s_irs<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm t2SUBS : T2I_bin_s_irs<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm t2ADC : T2I_bin_c_irs<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm t2SBC : T2I_bin_c_irs<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+
+def tMLS : PseudoInst<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), 
+                      "mls $dst, $a, $b, $c", 
+                      [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
+                     Requires<[HasThumb2]>;
+
+def tORNrs : PseudoInst<(outs GPR:$dst), (ins GPR:$src1, t2_so_reg:$src2),
+                        "orn $dst, $src1, $src2",
+                        [(set GPR:$dst, (or GPR:$src1, (not t2_so_reg: $src2)))]>,
+                       Requires<[HasThumb2]>;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 684ecb4..59cf125 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -17,19 +17,22 @@
 #include "ARMAddressingModes.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
@@ -39,6 +42,12 @@ STATISTIC(NumSTMGened , "Number of stm instructions generated");
 STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
 STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
 
 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
 /// load / store instructions to form ldm / stm instructions.
@@ -82,6 +91,8 @@ namespace {
                       SmallVector<MachineBasicBlock::iterator, 4> &Merges);
 
     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI);
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
   };
@@ -586,13 +597,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
 static int getMemoryOpOffset(const MachineInstr *MI) {
   int Opcode = MI->getOpcode();
   bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
   unsigned NumOperands = MI->getDesc().getNumOperands();
   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
   int Offset = isAM2
-    ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+    ? ARM_AM::getAM2Offset(OffField)
+    : (isAM3 ? ARM_AM::getAM3Offset(OffField)
+             : ARM_AM::getAM5Offset(OffField) * 4);
   if (isAM2) {
     if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
       Offset = -Offset;
+  } else if (isAM3) {
+    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
   } else {
     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
       Offset = -Offset;
@@ -600,6 +617,120 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
   return Offset;
 }
 
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &MBBI,
+                          int OffImm, bool isDef,
+                          DebugLoc dl, unsigned NewOpc,
+                          unsigned Reg, bool RegDeadKill,
+                          unsigned BaseReg, bool BaseKill,
+                          unsigned OffReg, bool OffKill,
+                          ARMCC::CondCodes Pred, unsigned PredReg,
+                          const TargetInstrInfo *TII) {
+  unsigned Offset;
+  if (OffImm < 0)
+    Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
+  else
+    Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
+  if (isDef)
+    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+      .addReg(BaseReg, getKillRegState(BaseKill))
+      .addReg(OffReg,  getKillRegState(OffKill))
+      .addImm(Offset)
+      .addImm(Pred).addReg(PredReg);
+  else
+    BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+      .addReg(Reg, getKillRegState(RegDeadKill))
+      .addReg(BaseReg, getKillRegState(BaseKill))
+      .addReg(OffReg,  getKillRegState(OffKill))
+      .addImm(Offset)
+      .addImm(Pred).addReg(PredReg);
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = &*MBBI;
+  unsigned Opcode = MI->getOpcode();
+  if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
+    unsigned EvenReg = MI->getOperand(0).getReg();
+    unsigned OddReg  = MI->getOperand(1).getReg();
+    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
+    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+      return false;
+
+    bool isLd = Opcode == ARM::LDRD;
+    bool EvenDeadKill = isLd ?
+      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+    bool OddDeadKill  = isLd ?
+      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+    const MachineOperand &BaseOp = MI->getOperand(2);
+    unsigned BaseReg = BaseOp.getReg();
+    bool BaseKill = BaseOp.isKill();
+    const MachineOperand &OffOp = MI->getOperand(3);
+    unsigned OffReg = OffOp.getReg();
+    bool OffKill = OffOp.isKill();
+    int OffImm = getMemoryOpOffset(MI);
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+    if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
+      // Ascending register numbers and no offset. It's safe to change it to a
+      // ldm or stm.
+      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM;
+      if (isLd) {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+          .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+        ++NumLDRD2LDM;
+      } else {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg, getKillRegState(EvenDeadKill))
+          .addReg(OddReg, getKillRegState(OddDeadKill));
+        ++NumSTRD2STM;
+      }
+    } else {
+      // Split into two instructions.
+      unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR;
+      DebugLoc dl = MBBI->getDebugLoc();
+      // If this is a load and base register is killed, it may have been
+      // re-defed by the load, make sure the first load does not clobber it.
+      if (isLd &&
+          (BaseKill || OffKill) &&
+          (TRI->regsOverlap(EvenReg, BaseReg) ||
+           (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
+        assert(!TRI->regsOverlap(OddReg, BaseReg) &&
+               (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill,
+                      BaseReg, false, OffReg, false, Pred, PredReg, TII);
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill,
+                      BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII);
+      } else {
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                      EvenReg, EvenDeadKill, BaseReg, false, OffReg, false,
+                      Pred, PredReg, TII);
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+                      OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill,
+                      Pred, PredReg, TII);
+      }
+      if (isLd)
+        ++NumLDRD2LDR;
+      else
+        ++NumSTRD2STR;
+    }
+
+    MBBI = prior(MBBI);
+    MBB.erase(MI);
+  }
+  return false;
+}
+
 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
 /// ops of the same base and incrementing offset into LDM / STM ops.
 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
@@ -617,6 +748,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   RS->enterBasicBlock(&MBB);
   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
   while (MBBI != E) {
+    if (FixInvalidRegPairOp(MBB, MBBI))
+      continue;
+
     bool Advance  = false;
     bool TryMerge = false;
     bool Clobber  = false;
@@ -817,8 +951,10 @@ namespace {
     static char ID;
     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
 
+    const TargetData *TD;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const ARMSubtarget *STI;
     MachineRegisterInfo *MRI;
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -828,6 +964,11 @@ namespace {
     }
 
   private:
+    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+                          unsigned &NewOpc, unsigned &EvenReg,
+                          unsigned &OddReg, unsigned &BaseReg,
+                          unsigned &OffReg, unsigned &Offset,
+                          unsigned &PredReg, ARMCC::CondCodes &Pred);
     bool RescheduleOps(MachineBasicBlock *MBB,
                        SmallVector<MachineInstr*, 4> &Ops,
                        unsigned Base, bool isLd,
@@ -838,8 +979,10 @@ namespace {
 }
 
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  TD  = Fn.getTarget().getTargetData();
   TII = Fn.getTarget().getInstrInfo();
   TRI = Fn.getTarget().getRegisterInfo();
+  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
   MRI = &Fn.getRegInfo();
 
   bool Modified = false;
@@ -850,15 +993,19 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   return Modified;
 }
 
-static bool IsSafeToMove(bool isLd, unsigned Base,
-                         MachineBasicBlock::iterator I,
-                         MachineBasicBlock::iterator E,
-                         SmallPtrSet<MachineInstr*, 4> MoveOps,
-                         const TargetRegisterInfo *TRI) {
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+                                      MachineBasicBlock::iterator I,
+                                      MachineBasicBlock::iterator E,
+                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
+                                      SmallSet<unsigned, 4> &MemRegs,
+                                      const TargetRegisterInfo *TRI) {
   // Are there stores / loads / calls between them?
   // FIXME: This is overly conservative. We should make use of alias information
   // some day.
+  SmallSet<unsigned, 4> AddedRegPressure;
   while (++I != E) {
+    if (MemOps.count(&*I))
+      continue;
     const TargetInstrDesc &TID = I->getDesc();
     if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
       return false;
@@ -871,15 +1018,76 @@ static bool IsSafeToMove(bool isLd, unsigned Base,
       // str r1, [r0]
       // strh r5, [r0]
       // str r4, [r0, #+4]
-      if (TID.mayStore() && !MoveOps.count(&*I))
+      if (TID.mayStore())
         return false;
     }
     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
       MachineOperand &MO = I->getOperand(j);
-      if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base))
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
         return false;
+      if (Reg != Base && !MemRegs.count(Reg))
+        AddedRegPressure.insert(Reg);
     }
   }
+
+  // Estimate register pressure increase due to the transformation.
+  if (MemRegs.size() <= 4)
+    // Ok if we are moving small number of instructions.
+    return true;
+  return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+                                          DebugLoc &dl,
+                                          unsigned &NewOpc, unsigned &EvenReg,
+                                          unsigned &OddReg, unsigned &BaseReg,
+                                          unsigned &OffReg, unsigned &Offset,
+                                          unsigned &PredReg,
+                                          ARMCC::CondCodes &Pred) {
+  // FIXME: FLDS / FSTS -> FLDD / FSTD
+  unsigned Opcode = Op0->getOpcode();
+  if (Opcode == ARM::LDR)
+    NewOpc = ARM::LDRD;
+  else if (Opcode == ARM::STR)
+    NewOpc = ARM::STRD;
+  else
+    return 0;
+
+  // Must sure the base address satisfies i64 ld / st alignment requirement.
+  if (!Op0->hasOneMemOperand() ||
+      !Op0->memoperands_begin()->getValue() ||
+      Op0->memoperands_begin()->isVolatile())
+    return false;
+
+  unsigned Align = Op0->memoperands_begin()->getAlignment();
+  unsigned ReqAlign = STI->hasV6Ops()
+    ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align
+  if (Align < ReqAlign)
+    return false;
+
+  // Then make sure the immediate offset fits.
+  int OffImm = getMemoryOpOffset(Op0);
+  ARM_AM::AddrOpc AddSub = ARM_AM::add;
+  if (OffImm < 0) {
+    AddSub = ARM_AM::sub;
+    OffImm = - OffImm;
+  }
+  if (OffImm >= 256) // 8 bits
+    return false;
+  Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+
+  EvenReg = Op0->getOperand(0).getReg();
+  OddReg  = Op1->getOperand(0).getReg();
+  if (EvenReg == OddReg)
+    return false;
+  BaseReg = Op0->getOperand(1).getReg();
+  OffReg = Op0->getOperand(2).getReg();
+  Pred = getInstrPredicate(Op0, PredReg);
+  dl = Op0->getDebugLoc();
   return true;
 }
 
@@ -902,6 +1110,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
     MachineInstr *FirstOp = 0;
     MachineInstr *LastOp = 0;
     int LastOffset = 0;
+    unsigned LastOpcode = 0;
     unsigned LastBytes = 0;
     unsigned NumMove = 0;
     for (int i = Ops.size() - 1; i >= 0; --i) {
@@ -916,6 +1125,10 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
         LastOp = Op;
       }
 
+      unsigned Opcode = Op->getOpcode();
+      if (LastOpcode && Opcode != LastOpcode)
+        break;
+
       int Offset = getMemoryOpOffset(Op);
       unsigned Bytes = getLSMultipleTransferSize(Op);
       if (LastBytes) {
@@ -924,34 +1137,80 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
       }
       LastOffset = Offset;
       LastBytes = Bytes;
-      if (++NumMove == 4)
+      LastOpcode = Opcode;
+      if (++NumMove == 8) // FIXME: Tune
         break;
     }
 
     if (NumMove <= 1)
       Ops.pop_back();
     else {
-      SmallPtrSet<MachineInstr*, 4> MoveOps;
-      for (int i = NumMove-1; i >= 0; --i)
-        MoveOps.insert(Ops[i]);
+      SmallPtrSet<MachineInstr*, 4> MemOps;
+      SmallSet<unsigned, 4> MemRegs;
+      for (int i = NumMove-1; i >= 0; --i) {
+        MemOps.insert(Ops[i]);
+        MemRegs.insert(Ops[i]->getOperand(0).getReg());
+      }
 
       // Be conservative, if the instructions are too far apart, don't
       // move them. We want to limit the increase of register pressure.
-      bool DoMove = (LastLoc - FirstLoc) < NumMove*4;
+      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
       if (DoMove)
-        DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI);
+        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+                                           MemOps, MemRegs, TRI);
       if (!DoMove) {
         for (unsigned i = 0; i != NumMove; ++i)
           Ops.pop_back();
       } else {
         // This is the new location for the loads / stores.
         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
-        while (InsertPos != MBB->end() && MoveOps.count(InsertPos))
+        while (InsertPos != MBB->end() && MemOps.count(InsertPos))
           ++InsertPos;
-        for (unsigned i = 0; i != NumMove; ++i) {
-          MachineInstr *Op = Ops.back();
+
+        // If we are moving a pair of loads / stores, see if it makes sense
+        // to try to allocate a pair of registers that can form register pairs.
+        MachineInstr *Op0 = Ops.back();
+        MachineInstr *Op1 = Ops[Ops.size()-2];
+        unsigned EvenReg = 0, OddReg = 0;
+        unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
+        ARMCC::CondCodes Pred = ARMCC::AL;
+        unsigned NewOpc = 0;
+        unsigned Offset = 0;
+        DebugLoc dl;
+        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+                                             EvenReg, OddReg, BaseReg, OffReg,
+                                             Offset, PredReg, Pred)) {
+          Ops.pop_back();
           Ops.pop_back();
-          MBB->splice(InsertPos, MBB, Op);
+
+          // Form the pair instruction.
+          if (isLd) {
+            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+              .addReg(EvenReg, RegState::Define)
+              .addReg(OddReg, RegState::Define)
+              .addReg(BaseReg).addReg(0).addImm(Offset)
+              .addImm(Pred).addReg(PredReg);
+            ++NumLDRDFormed;
+          } else {
+            BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+              .addReg(EvenReg)
+              .addReg(OddReg)
+              .addReg(BaseReg).addReg(0).addImm(Offset)
+              .addImm(Pred).addReg(PredReg);
+            ++NumSTRDFormed;
+          }
+          MBB->erase(Op0);
+          MBB->erase(Op1);
+
+          // Add register allocation hints to form register pairs.
+          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
+        } else {
+          for (unsigned i = 0; i != NumMove; ++i) {
+            MachineInstr *Op = Ops.back();
+            Ops.pop_back();
+            MBB->splice(InsertPos, MBB, Op);
+          }
         }
 
         NumLdStMoved += NumMove;
@@ -1039,7 +1298,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
       }
 
       if (StopHere) {
-        // Found a duplicate (a base+offset combination that's seen earlier). Backtrack.
+        // Found a duplicate (a base+offset combination that's seen earlier).
+        // Backtrack.
         --Loc;
         break;
       }
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 199858f..bbc1300 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -159,7 +159,7 @@ ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
                                  const ARMSubtarget &sti)
   : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     TII(tii), STI(sti),
-    FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+    FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
 }
 
 static inline
@@ -194,10 +194,6 @@ void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
       .addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
 }
 
-const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
-  return &ARM::GPRRegClass;
-}
-
 /// isLowRegister - Returns true if the register is low register r0-r7.
 ///
 bool ARMRegisterInfo::isLowRegister(unsigned Reg) const {
@@ -304,6 +300,191 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
   return false;
 }
 
+const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
+  return &ARM::GPRRegClass;
+}
+
+/// getAllocationOrder - Returns the register allocation order for a specified
+/// register class in the form of a pair of TargetRegisterClass iterators.
+std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
+                                    unsigned HintType, unsigned HintReg,
+                                    const MachineFunction &MF) const {
+  // Alternative register allocation orders when favoring even / odd registers
+  // of register pairs.
+
+  // No FP, R9 is available.
+  static const unsigned GPREven1[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd1[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R7, R9 is available.
+  static const unsigned GPREven2[] = {
+    ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd2[] = {
+    ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R11, R9 is available.
+  static const unsigned GPREven3[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9
+  };
+  static const unsigned GPROdd3[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+    ARM::R8
+  };
+
+  // No FP, R9 is not available.
+  static const unsigned GPREven4[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd4[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R7, R9 is not available.
+  static const unsigned GPREven5[] = {
+    ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd5[] = {
+    ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R11, R9 is not available.
+  static const unsigned GPREven6[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+  };
+  static const unsigned GPROdd6[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+  };
+
+
+  if (HintType == ARMRI::RegPairEven) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!STI.isTargetDarwin() && !hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven1,
+                              GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven4,
+                              GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven2,
+                              GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven5,
+                              GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven3,
+                              GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven6,
+                              GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
+    }
+  } else if (HintType == ARMRI::RegPairOdd) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!STI.isTargetDarwin() && !hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd1,
+                              GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd4,
+                              GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd2,
+                              GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd5,
+                              GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd3,
+                              GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd6,
+                              GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
+    }
+  }
+  return std::make_pair(RC->allocation_order_begin(MF),
+                        RC->allocation_order_end(MF));
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                     const MachineFunction &MF) const {
+  if (Reg == 0 || !isPhysicalRegister(Reg))
+    return 0;
+  if (Type == 0)
+    return Reg;
+  else if (Type == (unsigned)ARMRI::RegPairOdd)
+    // Odd register.
+    return getRegisterPairOdd(Reg, MF);
+  else if (Type == (unsigned)ARMRI::RegPairEven)
+    // Even register.
+    return getRegisterPairEven(Reg, MF);
+  return 0;
+}
+
+void
+ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                    MachineFunction &MF) const {
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+  if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+       Hint.first == (unsigned)ARMRI::RegPairEven) &&
+      Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+    // If 'Reg' is one of the even / odd register pair and it's now changed
+    // (e.g. coalesced) into a different register. The other register of the
+    // pair allocation hint must be updated to reflect the relationship
+    // change.
+    unsigned OtherReg = Hint.second;
+    Hint = MRI->getRegAllocationHint(OtherReg);
+    if (Hint.second == Reg)
+      // Make sure the pair has not already divorced.
+      MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+  }
+}
+
 bool
 ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1506,9 +1687,8 @@ unsigned ARMRegisterInfo::getRARegister() const {
 
 unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
   if (STI.isTargetDarwin() || hasFP(MF))
-    return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11;
-  else
-    return ARM::SP;
+    return FramePtr;
+  return ARM::SP;
 }
 
 unsigned ARMRegisterInfo::getEHExceptionRegister() const {
@@ -1525,4 +1705,152 @@ int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
 }
 
+unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg,
+                                              const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R1:
+    return ARM::R0;
+  case ARM::R3:
+    // FIXME!
+    return STI.isThumb() ? 0 : ARM::R2;
+  case ARM::R5:
+    return ARM::R4;
+  case ARM::R7:
+    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R6;
+  case ARM::R9:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R8;
+  case ARM::R11:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+  case ARM::S1:
+    return ARM::S0;
+  case ARM::S3:
+    return ARM::S2;
+  case ARM::S5:
+    return ARM::S4;
+  case ARM::S7:
+    return ARM::S6;
+  case ARM::S9:
+    return ARM::S8;
+  case ARM::S11:
+    return ARM::S10;
+  case ARM::S13:
+    return ARM::S12;
+  case ARM::S15:
+    return ARM::S14;
+  case ARM::S17:
+    return ARM::S16;
+  case ARM::S19:
+    return ARM::S18;
+  case ARM::S21:
+    return ARM::S20;
+  case ARM::S23:
+    return ARM::S22;
+  case ARM::S25:
+    return ARM::S24;
+  case ARM::S27:
+    return ARM::S26;
+  case ARM::S29:
+    return ARM::S28;
+  case ARM::S31:
+    return ARM::S30;
+
+  case ARM::D1:
+    return ARM::D0;
+  case ARM::D3:
+    return ARM::D2;
+  case ARM::D5:
+    return ARM::D4;
+  case ARM::D7:
+    return ARM::D6;
+  case ARM::D9:
+    return ARM::D8;
+  case ARM::D11:
+    return ARM::D10;
+  case ARM::D13:
+    return ARM::D12;
+  case ARM::D15:
+    return ARM::D14;
+  }
+
+  return 0;
+}
+
+unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg,
+                                             const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R0:
+    return ARM::R1;
+  case ARM::R2:
+    // FIXME!
+    return STI.isThumb() ? 0 : ARM::R3;
+  case ARM::R4:
+    return ARM::R5;
+  case ARM::R6:
+    return isReservedReg(MF, ARM::R7)  ? 0 : ARM::R7;
+  case ARM::R8:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R9;
+  case ARM::R10:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+  case ARM::S0:
+    return ARM::S1;
+  case ARM::S2:
+    return ARM::S3;
+  case ARM::S4:
+    return ARM::S5;
+  case ARM::S6:
+    return ARM::S7;
+  case ARM::S8:
+    return ARM::S9;
+  case ARM::S10:
+    return ARM::S11;
+  case ARM::S12:
+    return ARM::S13;
+  case ARM::S14:
+    return ARM::S15;
+  case ARM::S16:
+    return ARM::S17;
+  case ARM::S18:
+    return ARM::S19;
+  case ARM::S20:
+    return ARM::S21;
+  case ARM::S22:
+    return ARM::S23;
+  case ARM::S24:
+    return ARM::S25;
+  case ARM::S26:
+    return ARM::S27;
+  case ARM::S28:
+    return ARM::S29;
+  case ARM::S30:
+    return ARM::S31;
+
+  case ARM::D0:
+    return ARM::D1;
+  case ARM::D2:
+    return ARM::D3;
+  case ARM::D4:
+    return ARM::D5;
+  case ARM::D6:
+    return ARM::D7;
+  case ARM::D8:
+    return ARM::D9;
+  case ARM::D10:
+    return ARM::D11;
+  case ARM::D12:
+    return ARM::D13;
+  case ARM::D14:
+    return ARM::D15;
+  }
+
+  return 0;
+}
+
 #include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index e1d9efb..e8f4fd8 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -22,12 +22,17 @@ namespace llvm {
   class TargetInstrInfo;
   class Type;
 
+/// Register allocation hints.
+namespace ARMRI {
+  enum {
+    RegPairOdd  = 1,
+    RegPairEven = 2
+  };
+}
+
 struct ARMRegisterInfo : public ARMGenRegisterInfo {
   const TargetInstrInfo &TII;
   const ARMSubtarget &STI;
-private:
-  /// FramePtr - ARM physical register used as frame ptr.
-  unsigned FramePtr;
 
 public:
   ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
@@ -49,10 +54,6 @@ public:
   /// if the register is a single precision VFP register.
   static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
 
-  /// getPointerRegClass - Return the register class to use to hold pointers.
-  /// This is used for addressing modes.
-  const TargetRegisterClass *getPointerRegClass() const;
-
   /// Code Generation virtual methods...
   const TargetRegisterClass *
     getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
@@ -65,6 +66,19 @@ public:
 
   bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
 
+  const TargetRegisterClass *getPointerRegClass() const;
+
+  std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+  getAllocationOrder(const TargetRegisterClass *RC,
+                     unsigned HintType, unsigned HintReg,
+                     const MachineFunction &MF) const;
+
+  unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                               const MachineFunction &MF) const;
+
+  void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                          MachineFunction &MF) const;
+
   bool requiresRegisterScavenging(const MachineFunction &MF) const;
 
   bool hasFP(const MachineFunction &MF) const;
@@ -95,6 +109,15 @@ public:
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
   
   bool isLowRegister(unsigned Reg) const;
+
+private:
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
+
+  unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+  unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index ebe7d58..d864079 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -134,7 +134,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
     GPRClass::allocation_order_begin(const MachineFunction &MF) const {
       const TargetMachine &TM = MF.getTarget();
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.useThumbBacktraces()) {
+      if (Subtarget.isTargetDarwin()) {
         if (Subtarget.isR9Reserved())
           return ARM_GPR_AO_4;
         else
@@ -154,7 +154,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
       const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
       GPRClass::iterator I;
 
-      if (Subtarget.useThumbBacktraces()) {
+      if (Subtarget.isTargetDarwin()) {
         if (Subtarget.isR9Reserved()) {
           I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
         } else {
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 0000000..75fa707
--- /dev/null
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,35 @@
+//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across ARM processors
+//
+def FU_iALU   : FuncUnit; // Integer alu unit
+def FU_iLdSt  : FuncUnit; // Integer load / store unit
+def FU_FpALU  : FuncUnit; // FP alu unit
+def FU_FpLdSt : FuncUnit; // FP load / store unit
+def FU_Br     : FuncUnit; // Branch unit
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALU    : InstrItinClass;
+def IIC_iLoad   : InstrItinClass;
+def IIC_iStore  : InstrItinClass;
+def IIC_fpALU   : InstrItinClass;
+def IIC_fpLoad  : InstrItinClass;
+def IIC_fpStore : InstrItinClass;
+def IIC_Br      : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[]>;
+
+include "ARMScheduleV6.td"
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 0000000..596a57f
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,22 @@
+//===- ARMSchedule.td - ARM v6 Scheduling Definitions ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+def V6Itineraries : ProcessorItineraries<[
+  InstrItinData<IIC_iALU    , [InstrStage<1, [FU_iALU]>]>,
+  InstrItinData<IIC_iLoad   , [InstrStage<2, [FU_iLdSt]>]>,
+  InstrItinData<IIC_iStore  , [InstrStage<1, [FU_iLdSt]>]>,
+  InstrItinData<IIC_fpALU   , [InstrStage<6, [FU_FpALU]>]>,
+  InstrItinData<IIC_fpLoad  , [InstrStage<2, [FU_FpLdSt]>]>,
+  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_FpLdSt]>]>,
+  InstrItinData<IIC_Br      , [InstrStage<3, [FU_Br]>]>
+]>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index a978380..7ac7b49 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -24,7 +24,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
   , ARMFPUType(None)
   , IsThumb(isThumb)
   , ThumbMode(Thumb1)
-  , UseThumbBacktraces(false)
   , IsR9Reserved(false)
   , stackAlignment(4)
   , CPUString("generic")
@@ -83,8 +82,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
   if (isAAPCS_ABI())
     stackAlignment = 8;
 
-  if (isTargetDarwin()) {
-    UseThumbBacktraces = true;
+  if (isTargetDarwin())
     IsR9Reserved = true;
-  }
 }
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 0704055..c3cc7ff 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,6 +14,7 @@
 #ifndef ARMSUBTARGET_H
 #define ARMSUBTARGET_H
 
+#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetSubtarget.h"
 #include <string>
 
@@ -48,9 +49,6 @@ protected:
   /// ThumbMode - Indicates supported Thumb version.
   ThumbTypeEnum ThumbMode;
 
-  /// UseThumbBacktraces - True if we use thumb style backtraces.
-  bool UseThumbBacktraces;
-
   /// IsR9Reserved - True if R9 is a not available as general purpose register.
   bool IsR9Reserved;
 
@@ -61,6 +59,9 @@ protected:
   /// CPUString - String name of used CPU.
   std::string CPUString;
 
+  /// Selected instruction itineraries (one entry per itinerary class.)
+  InstrItineraryData InstrItins;
+  
  public:
   enum {
     isELF, isDarwin
@@ -106,14 +107,17 @@ protected:
   bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
 
   bool isThumb() const { return IsThumb; }
-  bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); }
-  bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+  bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
+  bool hasThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
 
-  bool useThumbBacktraces() const { return UseThumbBacktraces; }
   bool isR9Reserved() const { return IsR9Reserved; }
 
   const std::string & getCPUString() const { return CPUString; }
 
+  /// getInstrItins - Return the instruction itineraies based on subtarget 
+  /// selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
   /// getStackAlignment - Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
   /// function for this subtarget.
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
index 4107dcc..42b8eae 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.cpp
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -17,80 +17,42 @@
 #include <cctype>
 using namespace llvm;
 
-
 const char *const llvm::arm_asm_table[] = {
-                                      "{r0}", "r0",
-                                      "{r1}", "r1",
-                                      "{r2}", "r2",
-                                      "{r3}", "r3",
-                                      "{r4}", "r4",
-                                      "{r5}", "r5",
-                                      "{r6}", "r6",
-                                      "{r7}", "r7",
-                                      "{r8}", "r8",
-                                      "{r9}", "r9",
-                                      "{r10}", "r10",
-                                      "{r11}", "r11",
-                                      "{r12}", "r12",
-                                      "{r13}", "r13",
-                                      "{r14}", "r14",
-                                      "{lr}", "lr",
-                                      "{sp}", "sp",
-                                      "{ip}", "ip",
-                                      "{fp}", "fp",
-                                      "{sl}", "sl",
-                                      "{memory}", "memory",
-                                      "{cc}", "cc",
-                                      0,0};
+  "{r0}", "r0",
+  "{r1}", "r1",
+  "{r2}", "r2",
+  "{r3}", "r3",
+  "{r4}", "r4",
+  "{r5}", "r5",
+  "{r6}", "r6",
+  "{r7}", "r7",
+  "{r8}", "r8",
+  "{r9}", "r9",
+  "{r10}", "r10",
+  "{r11}", "r11",
+  "{r12}", "r12",
+  "{r13}", "r13",
+  "{r14}", "r14",
+  "{lr}", "lr",
+  "{sp}", "sp",
+  "{ip}", "ip",
+  "{fp}", "fp",
+  "{sl}", "sl",
+  "{memory}", "memory",
+  "{cc}", "cc",
+  0,0
+};
 
 ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
   ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 
-  GlobalPrefix = "_";
-  PrivateGlobalPrefix = "L";
-  LessPrivateGlobalPrefix = "l";
-  StringConstantPrefix = "\1LC";
-  BSSSection = 0;                       // no BSS section
   ZeroDirective = "\t.space\t";
   ZeroFillDirective = "\t.zerofill\t";  // Uses .zerofill
   SetDirective = "\t.set\t";
-  WeakRefDirective = "\t.weak_reference\t";
-  WeakDefDirective = "\t.weak_definition ";
-  HiddenDirective = "\t.private_extern\t";
   ProtectedDirective = NULL;
-  JumpTableDataSection = ".const";
-  CStringSection = "\t.cstring";
   HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
-  NeedsIndirectEncoding = true;
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorsSection = ".constructor";
-    StaticDtorsSection = ".destructor";
-  } else {
-    StaticCtorsSection = ".mod_init_func";
-    StaticDtorsSection = ".mod_term_func";
-  }
-
-  // In non-PIC modes, emit a special label before jump tables so that the
-  // linker can perform more accurate dead code stripping.
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    // Emit a local label that is preserved until the linker runs.
-    JumpTableSpecialLabelPrefix = "l";
-  }
-
-  NeedsSet = true;
-  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
-  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
-  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
-  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
-  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
-  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
-  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
-  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
-  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
-  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
-  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+  SupportsDebugInformation = true;
 }
 
 ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
@@ -115,7 +77,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
   DwarfLocSection =     "\t.section\t.debug_loc,\"\",%progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",%progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
 
   if (Subtarget->isAAPCS_ABI()) {
     StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array";
@@ -124,6 +86,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
     StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
     StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
   }
+  SupportsDebugInformation = true;
 }
 
 /// Count the number of comma-separated arguments.
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
index 9e6f856..683692f 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.h
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -26,8 +26,7 @@ namespace llvm {
 
   template <class BaseTAI>
   struct ARMTargetAsmInfo : public BaseTAI {
-    explicit ARMTargetAsmInfo(const ARMTargetMachine &TM):
-      BaseTAI(TM) {
+    explicit ARMTargetAsmInfo(const ARMTargetMachine &TM) : BaseTAI(TM) {
       BaseTAI::AsmTransCBE = arm_asm_table;
 
       BaseTAI::AlignmentIsInBytes = false;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7033907..8006b9b 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -23,9 +23,6 @@
 #include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
-static cl::opt<bool>
-EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden,
-                  cl::desc("Enable pre-regalloc load store optimization pass"));
 static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
                               cl::desc("Disable load store optimization pass"));
 static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
@@ -42,6 +39,11 @@ int ARMTargetMachineModule = 0;
 static RegisterTarget<ARMTargetMachine>   X("arm",   "ARM");
 static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeARMTarget() { }
+}
+
 // No assembler printer by default
 ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0;
 
@@ -97,7 +99,8 @@ ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
     InstrInfo(Subtarget),
     FrameInfo(Subtarget),
     JITInfo(),
-    TLInfo(*this) {
+    TLInfo(*this),
+    InstrItins(Subtarget.getInstrItineraryData()) {
   DefRelocModel = getRelocationModel();
 }
 
@@ -149,8 +152,6 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
 
 bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
-  if (!EnablePreLdStOpti)
-    return false;
   // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
   if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
     PM.add(createARMLoadStoreOptimizationPass(true));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 7192c1b..c4c8e6c 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -28,13 +28,14 @@ namespace llvm {
 class Module;
 
 class ARMTargetMachine : public LLVMTargetMachine {
-  ARMSubtarget      Subtarget;
-  const TargetData  DataLayout;       // Calculates type size & alignment
-  ARMInstrInfo      InstrInfo;
-  ARMFrameInfo      FrameInfo;
-  ARMJITInfo        JITInfo;
-  ARMTargetLowering TLInfo;
-  Reloc::Model      DefRelocModel;    // Reloc model before it's overridden.
+  ARMSubtarget        Subtarget;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMInstrInfo        InstrInfo;
+  ARMFrameInfo        FrameInfo;
+  ARMJITInfo          JITInfo;
+  ARMTargetLowering   TLInfo;
+  InstrItineraryData  InstrItins;
+  Reloc::Model        DefRelocModel;    // Reloc model before it's overridden.
 
 protected:
   // To avoid having target depend on the asmprinter stuff libraries, asmprinter
@@ -59,6 +60,9 @@ public:
   virtual       ARMTargetLowering *getTargetLowering() const {
     return const_cast<ARMTargetLowering*>(&TLInfo);
   }
+  virtual const InstrItineraryData getInstrItineraryData() const {  
+    return InstrItins;
+  }
 
   static void registerAsmPrinter(AsmPrinterCtorFn F) {
     AsmPrinterCtor = F;
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index d908cf4..948a100 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -45,7 +45,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
 namespace {
   class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
     DwarfWriter *DW;
-    MachineModuleInfo *MMI;
 
     /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     /// make the right decision when printing asm code for different targets.
@@ -84,7 +83,7 @@ namespace {
     explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
                            const TargetAsmInfo *T, CodeGenOpt::Level OL,
                            bool V)
-      : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL),
+      : AsmPrinter(O, TM, T, OL, V), DW(0), AFI(NULL), MCP(NULL),
         InCPMode(false) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
     }
@@ -97,6 +96,7 @@ namespace {
                       const char *Modifier = 0);
     void printSOImmOperand(const MachineInstr *MI, int opNum);
     void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
+    void printSOOperand(const MachineInstr *MI, int OpNum);
     void printSORegOperand(const MachineInstr *MI, int opNum);
     void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
     void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
@@ -396,6 +396,28 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
   printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
 }
 
+// Constant shifts so_reg is a 3-operand unit corresponding to register forms of
+// the A5.1 "Addressing Mode 1 - Data-processing operands" forms.  This
+// includes:
+// REG 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMAsmPrinter::printSOOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  O << TM.getRegisterInfo()->getAsmName(Reg);
+
+  // Print the shift opc.
+  O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
+    << " ";
+
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
 // so_reg is a 4-operand unit corresponding to register forms of the A5.1
 // "Addressing Mode 1 - Data-processing operands" forms.  This includes:
 //    REG 0   0    - e.g. R5
@@ -805,17 +827,11 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 bool ARMAsmPrinter::doInitialization(Module &M) {
 
   bool Result = AsmPrinter::doInitialization(M);
-
-  // Emit initial debug information.
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  assert(MMI);
   DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "Dwarf Writer is not available");
-  DW->BeginModule(&M, MMI, O, this, TAI);
 
-  // Darwin wants symbols to be quoted if they have complex names.
-  if (Subtarget->isTargetDarwin())
-    Mang->setUseQuotes(true);
+  // Thumb-2 instructions are supported only in unified assembler syntax mode.
+  if (Subtarget->hasThumb2())
+    O << "\t.syntax unified\n";
 
   // Emit ARM Build Attributes
   if (Subtarget->isTargetELF()) {
@@ -1115,3 +1131,9 @@ namespace {
     }
   } Registrator;
 }
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeARMAsmPrinter() { }
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 7ed8ef6..1be1713 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -39,7 +39,7 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
 
 AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) {
   // Set up the TargetLowering object.
-  //I am having problems with shr n ubyte 1
+  //I am having problems with shr n i8 1
   setShiftAmountType(MVT::i64);
   setBooleanContents(ZeroOrOneBooleanContent);
   
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 4c83054..cdd4fa4 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -21,16 +21,17 @@
 
 using namespace llvm;
 
-/// AlphaTargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library.  In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this.  Though it is unused, do not remove it.
-extern "C" int AlphaTargetMachineModule;
-int AlphaTargetMachineModule = 0;
-
 // Register the targets
 static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]");
 
+// No assembler printer by default
+AlphaTargetMachine::AsmPrinterCtorFn AlphaTargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeAlphaTarget() { }
+}
+
 const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
   return new AlphaTargetAsmInfo(*this);
 }
@@ -92,23 +93,32 @@ bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                             bool Verbose,
                                             raw_ostream &Out) {
   PM.add(createAlphaLLRPPass(*this));
-  PM.add(createAlphaCodePrinterPass(Out, *this, OptLevel, Verbose));
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel,
                                         bool DumpAsm, MachineCodeEmitter &MCE) {
   PM.add(createAlphaCodeEmitterPass(*this, MCE));
-  if (DumpAsm)
-    PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
   return false;
 }
 bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                         CodeGenOpt::Level OptLevel,
                                         bool DumpAsm, JITCodeEmitter &JCE) {
   PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
-  if (DumpAsm)
-    PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+  if (DumpAsm) {
+    assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+    if (AsmPrinterCtor)
+      PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+  }
   return false;
 }
 bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 51224e8..946ca55 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -33,10 +33,18 @@ class AlphaTargetMachine : public LLVMTargetMachine {
   AlphaJITInfo JITInfo;
   AlphaSubtarget Subtarget;
   AlphaTargetLowering TLInfo;
-  
+
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
+
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            TargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
 public:
   AlphaTargetMachine(const Module &M, const std::string &FS);
 
@@ -46,7 +54,7 @@ public:
   virtual const AlphaRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  virtual AlphaTargetLowering* getTargetLowering() const { 
+  virtual AlphaTargetLowering* getTargetLowering() const {
     return const_cast<AlphaTargetLowering*>(&TLInfo);
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
@@ -56,7 +64,7 @@ public:
 
   static unsigned getJITMatchQuality();
   static unsigned getModuleMatchQuality(const Module &M);
-  
+
   // Pass Pipeline Configuration
   virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
   virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
@@ -75,6 +83,10 @@ public:
                                     CodeGenOpt::Level OptLevel,
                                     bool DumpAsm,
                                     JITCodeEmitter &JCE);
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 74b48ee6..7b73bb3 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -303,3 +303,17 @@ bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   O << ")";
   return false;
 }
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeAlphaAsmPrinter() { }
+}
+
+namespace {
+  static struct Register {
+    Register() {
+      AlphaTargetMachine::registerAsmPrinter(createAlphaCodePrinterPass);
+    }
+  } Registrator;
+}
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 5814d27..c3554f6 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -59,6 +59,11 @@ int CBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CTargetMachine> X("c", "C backend");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeCBackendTarget() { }
+}
+
 namespace {
   /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
   /// any unnamed structure types that are used by the program, and merges
@@ -1449,6 +1454,17 @@ std::string CWriter::GetValueName(const Value *Operand) {
 /// writeInstComputationInline - Emit the computation for the specified
 /// instruction inline, with no destination provided.
 void CWriter::writeInstComputationInline(Instruction &I) {
+  // We can't currently support integer types other than 1, 8, 16, 32, 64.
+  // Validate this.
+  const Type *Ty = I.getType();
+  if (Ty->isInteger() && (Ty!=Type::Int1Ty && Ty!=Type::Int8Ty &&
+        Ty!=Type::Int16Ty && Ty!=Type::Int32Ty && Ty!=Type::Int64Ty)) {
+      cerr << "The C backend does not currently support integer "
+           << "types of widths other than 1, 8, 16, 32, 64.\n";
+      cerr << "This is being tracked as PR 4158.\n";
+      abort();
+  }
+
   // If this is a non-trivial bool computation, make sure to truncate down to
   // a 1 bit value.  This is important because we want "add i1 x, y" to return
   // "0" when x and y are true, not "2" for example.
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index da1bf07..26a8ece 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -287,12 +287,11 @@ namespace {
   /// LinuxAsmPrinter - SPU assembly printer, customized for Linux
   class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter {
     DwarfWriter *DW;
-    MachineModuleInfo *MMI;
   public:
     explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM,
                              const TargetAsmInfo *T, CodeGenOpt::Level F,
                              bool V)
-      : SPUAsmPrinter(O, TM, T, F, V), DW(0), MMI(0) {}
+      : SPUAsmPrinter(O, TM, T, F, V), DW(0) {}
 
     virtual const char *getPassName() const {
       return "STI CBEA SPU Assembly Printer";
@@ -490,12 +489,8 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
 
 bool LinuxAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
-  SwitchToTextSection("\t.text");
-  // Emit initial debug information.
   DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "Dwarf Writer is not available");
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  DW->BeginModule(&M, MMI, O, this, TAI);
+  SwitchToTextSection("\t.text");
   return Result;
 }
 
@@ -621,3 +616,17 @@ FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
                                             bool verbose) {
   return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeCellSPUAsmPrinter() { }
+}
+
+namespace {
+  static struct Register {
+    Register() {
+      SPUTargetMachine::registerAsmPrinter(createSPUAsmPrinterPass);
+    }
+  } Registrator;
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 864a914..b286443 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -249,6 +249,25 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 
+  // Expand double-width multiplication
+  // FIXME: It would probably be reasonable to support some of these operations
+  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
+  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
+
   // Need to custom handle (some) common i8, i64 math ops
   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
index ff88ed8..2868ff7 100644
--- a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
@@ -41,7 +41,6 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
 
   SupportsDebugInformation = true;
   NeedsSet = true;
-  SupportsMacInfoSection = false;
   DwarfAbbrevSection =  "\t.section        .debug_abbrev,\"\",@progbits";
   DwarfInfoSection =    "\t.section        .debug_info,\"\",@progbits";
   DwarfLineSection =    "\t.section        .debug_line,\"\",@progbits";
@@ -52,7 +51,7 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
   DwarfLocSection =     "\t.section        .debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section        .debug_aranges,\"\",@progbits";
   DwarfRangesSection =  "\t.section        .debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section        .debug_macinfo,\"\",progbits";
+  DwarfMacroInfoSection = 0;  // macro info not supported.
 
   // Exception handling is not supported on CellSPU (think about it: you only
   // have 256K for code+data. Would you support exception handling?)
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 7fa9022..c675ebb 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -23,20 +23,20 @@
 
 using namespace llvm;
 
-/// CellSPUTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int CellSPUTargetMachineModule;
-int CellSPUTargetMachineModule = 0;
-
 namespace {
   // Register the targets
   RegisterTarget<SPUTargetMachine>
   CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]");
 }
 
+// No assembler printer by default
+SPUTargetMachine::AsmPrinterCtorFn SPUTargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeCellSPUTarget() { }
+}
+
 const std::pair<unsigned, int> *
 SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
   NumEntries = 1;
@@ -93,6 +93,9 @@ bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                           CodeGenOpt::Level OptLevel,
                                           bool Verbose,
                                           raw_ostream &Out) {
-  PM.add(createSPUAsmPrinterPass(Out, *this, OptLevel, Verbose));
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index cd39203..d8fe300 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -35,10 +35,18 @@ class SPUTargetMachine : public LLVMTargetMachine {
   SPUFrameInfo        FrameInfo;
   SPUTargetLowering   TLInfo;
   InstrItineraryData  InstrItins;
-  
+
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
+
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            SPUTargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
 public:
   SPUTargetMachine(const Module &M, const std::string &FS);
 
@@ -78,7 +86,7 @@ public:
     return &DataLayout;
   }
 
-  virtual const InstrItineraryData getInstrItineraryData() const {  
+  virtual const InstrItineraryData getInstrItineraryData() const {
     return InstrItins;
   }
   
@@ -88,6 +96,10 @@ public:
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
                                   bool Verbose, raw_ostream &Out);
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 04a6829..1feea96 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -82,6 +82,11 @@ int CppBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeCppBackendTarget() { }
+}
+
 namespace {
   typedef std::vector<const Type*> TypeList;
   typedef std::map<const Type*,std::string> TypeMap;
diff --git a/lib/Target/DarwinTargetAsmInfo.cpp b/lib/Target/DarwinTargetAsmInfo.cpp
index 05d2351..d7d675a 100644
--- a/lib/Target/DarwinTargetAsmInfo.cpp
+++ b/lib/Target/DarwinTargetAsmInfo.cpp
@@ -50,6 +50,53 @@ DarwinTargetAsmInfo::DarwinTargetAsmInfo(const TargetMachine &TM)
   ConstDataSection = getUnnamedSection(".const_data", SectionFlags::None);
   DataCoalSection = getNamedSection("\t__DATA,__datacoal_nt,coalesced",
                                     SectionFlags::Writeable);
+    
+  
+  // Common settings for all Darwin targets.
+  // Syntax:
+  GlobalPrefix = "_";
+  PrivateGlobalPrefix = "L";
+  LessPrivateGlobalPrefix = "l";  // Marker for some ObjC metadata
+  StringConstantPrefix = "\1LC";
+  NeedsSet = true;
+  NeedsIndirectEncoding = true;
+  AllowQuotesInName = true;
+  HasSingleParameterDotFile = false;
+
+  // In non-PIC modes, emit a special label before jump tables so that the
+  // linker can perform more accurate dead code stripping.  We do not check the
+  // relocation model here since it can be overridden later.
+  JumpTableSpecialLabelPrefix = "l";
+    
+  // Directives:
+  WeakDefDirective = "\t.weak_definition ";
+  WeakRefDirective = "\t.weak_reference ";
+  HiddenDirective = "\t.private_extern ";
+    
+  // Sections:
+  CStringSection = "\t.cstring";
+  JumpTableDataSection = "\t.const\n";
+  BSSSection = 0;
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorsSection = ".constructor";
+    StaticDtorsSection = ".destructor";
+  } else {
+    StaticCtorsSection = ".mod_init_func";
+    StaticDtorsSection = ".mod_term_func";
+  }
+    
+  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+  DwarfMacroInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
 }
 
 /// emitUsedDirectiveFor - On Darwin, internally linked data beginning with
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
index fc54e23..662c667 100644
--- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -374,3 +374,18 @@ FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o,
                                               bool verbose) {
   return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
+
+namespace {
+  static struct Register {
+    Register() {
+      IA64TargetMachine::registerAsmPrinter(createIA64CodePrinterPass);
+    }
+  } Registrator;
+}
+
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeIA64AsmPrinter() { }
+}
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
index 34a0686..c545b9c 100644
--- a/lib/Target/IA64/IA64ISelLowering.cpp
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -107,6 +107,10 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM)
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VAARG             , MVT::Other, Custom);
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+
+  // FIXME: These should be legal
+  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
+  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
   
   // Use the default implementation.
   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
index 878a00a..0b93ee5 100644
--- a/lib/Target/IA64/IA64TargetMachine.cpp
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -19,16 +19,18 @@
 #include "llvm/Target/TargetMachineRegistry.h"
 using namespace llvm;
 
-/// IA64TargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library.  In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this.  Though it is unused, do not remove it.
-extern "C" int IA64TargetMachineModule;
-int IA64TargetMachineModule = 0;
-
-static RegisterTarget<IA64TargetMachine> X("ia64", 
+// Register the target
+static RegisterTarget<IA64TargetMachine> X("ia64",
                                            "IA-64 (Itanium) [experimental]");
 
+// No assembler printer by default
+IA64TargetMachine::AsmPrinterCtorFn IA64TargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeIA64Target() { }
+}
+
 const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const {
   return new IA64TargetAsmInfo(*this);
 }
@@ -88,7 +90,10 @@ bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                            CodeGenOpt::Level OptLevel,
                                            bool Verbose,
                                            raw_ostream &Out) {
-  PM.add(createIA64CodePrinterPass(Out, *this, OptLevel, Verbose));
+  // Output assembly language.
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
 
diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h
index 29d625c..a64da9f 100644
--- a/lib/Target/IA64/IA64TargetMachine.h
+++ b/lib/Target/IA64/IA64TargetMachine.h
@@ -30,24 +30,32 @@ class IA64TargetMachine : public LLVMTargetMachine {
   TargetFrameInfo    FrameInfo;
   //IA64JITInfo      JITInfo;
   IA64TargetLowering TLInfo;
-  
+
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
 
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            IA64TargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+
 public:
   IA64TargetMachine(const Module &M, const std::string &FS);
 
   virtual const IA64InstrInfo      *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameInfo    *getFrameInfo() const { return &FrameInfo; }
   virtual const IA64Subtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual       IA64TargetLowering *getTargetLowering() const { 
+  virtual       IA64TargetLowering *getTargetLowering() const {
     return const_cast<IA64TargetLowering*>(&TLInfo);
   }
   virtual const IA64RegisterInfo   *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
-  
+
   static unsigned getModuleMatchQuality(const Module &M);
 
   // Pass Pipeline Configuration
@@ -56,6 +64,10 @@ public:
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
                                   bool Verbose, raw_ostream &Out);
+
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 } // End llvm namespace
 
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 37e5b1e..0aff14f 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -55,6 +55,11 @@ int MSILTargetMachineModule = 0;
 
 static RegisterTarget<MSILTarget> X("msil", "MSIL backend");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeMSILTarget() { }
+}
+
 bool MSILModule::runOnModule(Module &M) {
   ModulePtr = &M;
   TD = &getAnalysis<TargetData>();
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 7886946..0f5244d 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -35,6 +35,11 @@ int MSP430TargetMachineModule = 0;
 static RegisterTarget<MSP430TargetMachine>
 X("msp430", "MSP430 [experimental]");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeMSP430Target() { }
+}
+
 MSP430TargetMachine::MSP430TargetMachine(const Module &M,
                                          const std::string &FS) :
   Subtarget(*this, M, FS),
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index dfb6238..077ec96 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -578,3 +578,17 @@ doFinalization(Module &M)
 
   return AsmPrinter::doFinalization(M);
 }
+
+namespace {
+  static struct Register {
+    Register() {
+      MipsTargetMachine::registerAsmPrinter(createMipsCodePrinterPass);
+    }
+  } Registrator;
+}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeMipsAsmPrinter() { }
+}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 4517cfc..42afceb 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -95,6 +95,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
+  setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
   setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
   setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
   setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
@@ -122,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
   setOperationAction(ISD::SRA_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRL_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::FCOPYSIGN,         MVT::f32,   Expand);
+  setOperationAction(ISD::FCOPYSIGN,         MVT::f64,   Expand);
 
   // We don't have line number support yet.
   setOperationAction(ISD::DBG_STOPPOINT,     MVT::Other, Expand);
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index ef524e3..83b9b62 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -31,6 +31,14 @@ int MipsTargetMachineModule = 0;
 static RegisterTarget<MipsTargetMachine>    X("mips", "Mips");
 static RegisterTarget<MipselTargetMachine>  Y("mipsel", "Mipsel");
 
+MipsTargetMachine::AsmPrinterCtorFn MipsTargetMachine::AsmPrinterCtor = 0;
+
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeMipsTarget() { }
+}
+
 const TargetAsmInfo *MipsTargetMachine::
 createTargetAsmInfo() const 
 {
@@ -125,9 +133,9 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
 // true if AssemblyEmitter is supported
 bool MipsTargetMachine::
 addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, 
-                   bool Verbose, raw_ostream &Out) 
-{
+                   bool Verbose, raw_ostream &Out)  {
   // Output assembly language.
-  PM.add(createMipsCodePrinterPass(Out, *this, OptLevel, Verbose));
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index a9e1df2..85fafad 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -33,10 +33,23 @@ namespace llvm {
   
   protected:
     virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
+  protected:
+    // To avoid having target depend on the asmprinter stuff libraries,
+    // asmprinter set this functions to ctor pointer at startup time if they are
+    // linked in.
+    typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                              MipsTargetMachine &tm,
+                                              CodeGenOpt::Level OptLevel,
+                                              bool verbose);
+    static AsmPrinterCtorFn AsmPrinterCtor;
+    
   public:
     MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle);
 
+    static void registerAsmPrinter(AsmPrinterCtorFn F) {
+      AsmPrinterCtor = F;
+    }
+    
     virtual const MipsInstrInfo   *getInstrInfo()     const 
     { return &InstrInfo; }
     virtual const TargetFrameInfo *getFrameInfo()     const 
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
index f9a8801..ca1089b 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -48,27 +48,10 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   const Function *F = MF.getFunction();
   CurrentFnName = Mang->getValueName(F);
 
-  // Iterate over the first basic block instructions to find if it has a
-  // DebugLoc. If so emit .file directive. Instructions such as movlw do not
-  // have valid DebugLoc, so need to iterate over instructions.
-  MachineFunction::const_iterator I = MF.begin();
-  for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end();
-       MBBI != E; MBBI++) {
-    const DebugLoc DLoc = MBBI->getDebugLoc();
-    if (!DLoc.isUnknown()) {
-      GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit;
-      unsigned line = MF.getDebugLocTuple(DLoc).Line;
-      DbgInfo.EmitFileDirective(CU);
-      DbgInfo.SetFunctBeginLine(line);
-      break;
-    }
-  }
-
   // Emit the function frame (args and temps).
   EmitFunctionFrame(MF);
 
-  // Emit function begin debug directive.
-  DbgInfo.EmitFunctBeginDI(F);
+  DbgInfo.BeginFunction(MF);
 
   // Emit the autos section of function.
   EmitAutos(CurrentFnName);
@@ -89,9 +72,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Emit function start label.
   O << CurrentFnName << ":\n";
 
-  // For emitting line directives, we need to keep track of the current
-  // source line. When it changes then only emit the line directive.
-  unsigned CurLine = 0;
+  DebugLoc CurDL;
   O << "\n"; 
   // Print out code for the function.
   for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
@@ -109,12 +90,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
       // Emit the line directive if source line changed.
       const DebugLoc DL = II->getDebugLoc();
-      if (!DL.isUnknown()) {
-        unsigned line = MF.getDebugLocTuple(DL).Line;
-        if (line != CurLine) {
-          O << "\t.line " << line << "\n";
-          CurLine = line;
-        }
+      if (!DL.isUnknown() && DL != CurDL) {
+        DbgInfo.ChangeDebugLoc(MF, DL);
+        CurDL = DL;
       }
         
       // Print the assembly for the instruction.
@@ -123,7 +101,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   }
   
   // Emit function end debug directives.
-  DbgInfo.EmitFunctEndDI(F, CurLine);
+  DbgInfo.EndFunction(MF);
 
   return false;  // we didn't modify anything.
 }
@@ -226,7 +204,7 @@ bool PIC16AsmPrinter::doInitialization (Module &M) {
     I->setSection(TAI->SectionForGlobal(I)->getName());
   }
 
-  DbgInfo.Init(M);
+  DbgInfo.BeginModule(M);
   EmitFunctionDecls(M);
   EmitUndefinedVars(M);
   EmitDefinedVars(M);
@@ -313,8 +291,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M)
 bool PIC16AsmPrinter::doFinalization(Module &M) {
   printLibcallDecls();
   EmitRemainingAutos();
-  DbgInfo.EmitVarDebugInfo(M);
-  DbgInfo.EmitEOF();
+  DbgInfo.EndModule(M);
   O << "\n\t" << "END\n";
   bool Result = AsmPrinter::doFinalization(M);
   return Result;
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
index 8bdcf72..3ec5659 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -32,7 +32,7 @@ namespace llvm {
     explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM,
                              const TargetAsmInfo *T, CodeGenOpt::Level OL,
                              bool V)
-      : AsmPrinter(O, TM, T, OL, V), DbgInfo(O,T) {
+      : AsmPrinter(O, TM, T, OL, V), DbgInfo(O, T) {
       PTLI = TM.getTargetLowering();
       PTAI = static_cast<const PIC16TargetAsmInfo *> (T);
     }
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index d7ebea7..27551cd 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -14,91 +14,23 @@
 #include "PIC16.h"
 #include "PIC16DebugInfo.h" 
 #include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
-                                     bool &HasAux, int Aux[], 
-                                     std::string &TypeName) {
-  if (Ty.isBasicType(Ty.getTag())) {
-    std::string Name = "";
-    Ty.getName(Name);
-    unsigned short BaseTy = GetTypeDebugNumber(Name);
-    TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-    TypeNo = TypeNo | (0xffff & BaseTy);
-  }
-  else if (Ty.isDerivedType(Ty.getTag())) {
-    switch(Ty.getTag())
-    {
-      case dwarf::DW_TAG_pointer_type:
-        TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-        TypeNo = TypeNo | PIC16Dbg::DT_PTR;
-        break;
-      default:
-        TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-    }
-    DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
-    PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
-  }
-  else if (Ty.isCompositeType(Ty.getTag())) {
-    switch (Ty.getTag()) {
-      case dwarf::DW_TAG_array_type: {
-        DICompositeType CTy = DICompositeType(Ty.getGV());
-        DIArray Elements = CTy.getTypeArray();
-        unsigned short size = 1;
-        unsigned short Dimension[4]={0,0,0,0};
-        for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
-          DIDescriptor Element = Elements.getElement(i);
-          if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
-            TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-            TypeNo = TypeNo | PIC16Dbg::DT_ARY;
-            DISubrange SubRange = DISubrange(Element.getGV());
-            Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
-            // Each dimension is represented by 2 bytes starting at byte 9.
-            Aux[8+i*2+0] = Dimension[i];
-            Aux[8+i*2+1] = Dimension[i] >> 8;
-            size = size * Dimension[i];
-          }
-        }
-        HasAux = true;
-        // In auxillary entry for array, 7th and 8th byte represent array size.
-        Aux[6] = size & 0xff;
-        Aux[7] = size >> 8;
-        DIType BaseType = CTy.getTypeDerivedFrom();
-        PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
-
-        break;
-      }
-      case dwarf:: DW_TAG_union_type:
-      case dwarf::DW_TAG_structure_type: {
-        DICompositeType CTy = DICompositeType(Ty.getGV());
-        TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-        if (Ty.getTag() == dwarf::DW_TAG_structure_type)
-          TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
-        else
-          TypeNo = TypeNo | PIC16Dbg::T_UNION;
-        CTy.getName(TypeName);
-        // UniqueSuffix is .number where number is obtained from 
-        // llvm.dbg.composite<number>.
-        std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
-        TypeName += UniqueSuffix;
-        unsigned short size = CTy.getSizeInBits()/8;
-        // 7th and 8th byte represent size.   
-        HasAux = true;
-        Aux[6] = size & 0xff;
-        Aux[7] = size >> 8;
-        break;
-      }
-      case dwarf::DW_TAG_enumeration_type: {
-        TypeNo = TypeNo << PIC16Dbg::S_BASIC;
-        TypeNo = TypeNo | PIC16Dbg::T_ENUM;
-        break;
-      }
-      default:
-        TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
-    }
-  }
+/// PopulateDebugInfo - Populate the TypeNo, Aux[] and TagName from Ty.
+///
+void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo,
+                                      bool &HasAux, int Aux[], 
+                                      std::string &TagName) {
+  if (Ty.isBasicType(Ty.getTag())) 
+    PopulateBasicTypeInfo (Ty, TypeNo);
+  else if (Ty.isDerivedType(Ty.getTag())) 
+    PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+  else if (Ty.isCompositeType(Ty.getTag())) 
+    PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
   else {
     TypeNo = PIC16Dbg::T_NULL;
     HasAux = false;
@@ -106,7 +38,127 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
   return;
 }
 
+/// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty.
+///
+void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) {
+  std::string Name = "";
+  Ty.getName(Name);
+  unsigned short BaseTy = GetTypeDebugNumber(Name);
+  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+  TypeNo = TypeNo | (0xffff & BaseTy);
+}
+
+/// PopulateDerivedTypeInfo - Populate TypeNo, Aux[], TagName for derived type 
+/// from Ty. Derived types are mostly pointers.
+///
+void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                            bool &HasAux, int Aux[],
+                                            std::string &TagName) {
+
+  switch(Ty.getTag())
+  {
+    case dwarf::DW_TAG_pointer_type:
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+      TypeNo = TypeNo | PIC16Dbg::DT_PTR;
+      break;
+    default:
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+  }
+  
+  // We also need to encode the the information about the base type of
+  // pointer in TypeNo.
+  DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
+  PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
+}
+
+/// PopulateArrayTypeInfo - Populate TypeNo, Aux[] for array from Ty.
+void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                          bool &HasAux, int Aux[],
+                                          std::string &TagName) {
 
+  DICompositeType CTy = DICompositeType(Ty.getGV());
+  DIArray Elements = CTy.getTypeArray();
+  unsigned short size = 1;
+  unsigned short Dimension[4]={0,0,0,0};
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Elements.getElement(i);
+    if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+      TypeNo = TypeNo | PIC16Dbg::DT_ARY;
+      DISubrange SubRange = DISubrange(Element.getGV());
+      Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
+      // Each dimension is represented by 2 bytes starting at byte 9.
+      Aux[8+i*2+0] = Dimension[i];
+      Aux[8+i*2+1] = Dimension[i] >> 8;
+      size = size * Dimension[i];
+    }
+  }
+  HasAux = true;
+  // In auxillary entry for array, 7th and 8th byte represent array size.
+  Aux[6] = size & 0xff;
+  Aux[7] = size >> 8;
+  DIType BaseType = CTy.getTypeDerivedFrom();
+  PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
+}
+
+/// PopulateStructOrUnionTypeInfo - Populate TypeNo, Aux[] , TagName for 
+/// structure or union.
+///
+void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty, 
+                                                  unsigned short &TypeNo,
+                                                  bool &HasAux, int Aux[],
+                                                  std::string &TagName) {
+  DICompositeType CTy = DICompositeType(Ty.getGV());
+  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+  if (Ty.getTag() == dwarf::DW_TAG_structure_type)
+    TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
+  else
+    TypeNo = TypeNo | PIC16Dbg::T_UNION;
+  CTy.getName(TagName);
+  // UniqueSuffix is .number where number is obtained from
+  // llvm.dbg.composite<number>.
+  std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
+  TagName += UniqueSuffix;
+  unsigned short size = CTy.getSizeInBits()/8;
+  // 7th and 8th byte represent size.
+  HasAux = true;
+  Aux[6] = size & 0xff;
+  Aux[7] = size >> 8;
+}
+
+/// PopulateEnumTypeInfo - Populate TypeNo for enum from Ty.
+void PIC16DbgInfo::PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo) {
+  TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+  TypeNo = TypeNo | PIC16Dbg::T_ENUM;
+}
+
+/// PopulateCompositeTypeInfo - Populate TypeNo, Aux[] and TagName for 
+/// composite types from Ty.
+///
+void PIC16DbgInfo::PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                              bool &HasAux, int Aux[],
+                                              std::string &TagName) {
+  switch (Ty.getTag()) {
+    case dwarf::DW_TAG_array_type: {
+      PopulateArrayTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+      break;
+    }
+    case dwarf:: DW_TAG_union_type:
+    case dwarf::DW_TAG_structure_type: {
+      PopulateStructOrUnionTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+      break;
+    }
+    case dwarf::DW_TAG_enumeration_type: {
+      PopulateEnumTypeInfo (Ty, TypeNo);
+      break;
+    }
+    default:
+      TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+  }
+}
+
+/// GetTypeDebugNumber - Get debug type number for given type.
+///
 unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type)  {
   if (type == "char")
     return PIC16Dbg::T_CHAR;
@@ -127,8 +179,10 @@ unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type)  {
   else
     return 0;
 }
-
-short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
+ 
+/// GetStorageClass - Get storage class for give debug variable.
+///
+short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) {
   short ClassNo;
   if (PAN::isLocalName(DIGV.getGlobal()->getName())) {
     // Generating C_AUTO here fails due to error in linker. Change it once
@@ -142,12 +196,126 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
   return ClassNo;
 }
 
-void PIC16DbgInfo::Init(Module &M) {
-  // Do all debug related initializations here.
-  EmitFileDirective(M);
+/// BeginModule - Emit necessary debug info to start a Module and do other
+/// required initializations.
+void PIC16DbgInfo::BeginModule(Module &M) {
+  // Emit file directive for module.
+  GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
+  if (CU) {
+    EmitDebugDirectives = true;
+    SwitchToCU(CU);
+  }
+
+  // Emit debug info for decls of composite types.
   EmitCompositeTypeDecls(M);
 }
 
+/// Helper to find first valid debug loc for a function.
+///
+static const DebugLoc GetDebugLocForFunction(const MachineFunction &MF) {
+  DebugLoc DL;
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+         II != E; ++II) {
+      DL = II->getDebugLoc();
+      if (!DL.isUnknown())
+        return DL;
+    }
+  }
+  return DL;
+}
+
+/// BeginFunction - Emit necessary debug info to start a function.
+///
+void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) {
+  if (! EmitDebugDirectives) return;
+  
+  // Retreive the first valid debug Loc and process it.
+  const DebugLoc &DL = GetDebugLocForFunction(MF);
+  ChangeDebugLoc(MF, DL, true);
+
+  EmitFunctBeginDI(MF.getFunction());
+  
+  // Set current line to 0 so that.line directive is genearted after .bf.
+  CurLine = 0;
+}
+
+/// ChangeDebugLoc - Take necessary steps when DebugLoc changes.
+/// CurFile and CurLine may change as a result of this.
+///
+void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF,  
+                                  const DebugLoc &DL, bool IsInBeginFunction) {
+  if (! EmitDebugDirectives) return;
+  assert (! DL.isUnknown()  && "can't change to invalid debug loc");
+
+  GlobalVariable *CU = MF.getDebugLocTuple(DL).CompileUnit;
+  unsigned line = MF.getDebugLocTuple(DL).Line;
+
+  SwitchToCU(CU);
+  SwitchToLine(line, IsInBeginFunction);
+}
+
+/// SwitchToLine - Emit line directive for a new line.
+///
+void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) {
+  if (CurLine == Line) return;
+  if (!IsInBeginFunction)  O << "\n\t.line " << Line << "\n";
+  CurLine = Line;
+}
+
+/// EndFunction - Emit .ef for end of function.
+///
+void PIC16DbgInfo::EndFunction(const MachineFunction &MF) {
+  if (! EmitDebugDirectives) return;
+  EmitFunctEndDI(MF.getFunction(), CurLine);
+}
+
+/// EndModule - Emit .eof for end of module.
+///
+void PIC16DbgInfo::EndModule(Module &M) {
+  if (! EmitDebugDirectives) return;
+  EmitVarDebugInfo(M);
+  if (CurFile != "") O << "\n\t.eof";
+}
+ 
+/// EmitCompositeTypeElements - Emit debug information for members of a 
+/// composite type.
+/// 
+void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
+                                              std::string UniqueSuffix) { 
+  unsigned long Value = 0;
+  DIArray Elements = CTy.getTypeArray();
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
+    DIDescriptor Element = Elements.getElement(i);
+    unsigned short TypeNo = 0;
+    bool HasAux = false;
+    int ElementAux[PIC16Dbg::AuxSize] = { 0 };
+    std::string TagName = "";
+    std::string ElementName;
+    GlobalVariable *GV = Element.getGV();
+    DIDerivedType DITy(GV);
+    DITy.getName(ElementName);
+    unsigned short ElementSize = DITy.getSizeInBits()/8;
+    // Get mangleddd name for this structure/union  element.
+    std::string MangMemName = ElementName + UniqueSuffix;
+    PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
+    short Class;
+    if( CTy.getTag() == dwarf::DW_TAG_union_type)
+      Class = PIC16Dbg::C_MOU;
+    else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
+      Class = PIC16Dbg::C_MOS;
+    EmitSymbol(MangMemName, Class, TypeNo, Value);
+    if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+      Value += ElementSize;
+    if (HasAux)
+      EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TagName);
+  }
+}
+
+/// EmitCompositeTypeDecls - Emit composite type declarations like structure 
+/// and union declarations.
+///
 void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
   for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(),
       E = M.getGlobalList().end(); I != E; I++) {
@@ -178,33 +346,10 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
 
         // Emit auxiliary debug information for structure/union tag. 
         EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize);
-        unsigned long Value = 0;
-        DIArray Elements = CTy.getTypeArray();
-        for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
-          DIDescriptor Element = Elements.getElement(i);
-          unsigned short TypeNo = 0;
-          bool HasAux = false;
-          int ElementAux[PIC16Dbg::AuxSize] = { 0 };
-          std::string TypeName = "";
-          std::string ElementName;
-          GlobalVariable *GV = Element.getGV();
-          DIDerivedType DITy(GV);
-          DITy.getName(ElementName);
-          unsigned short ElementSize = DITy.getSizeInBits()/8;
-          // Get mangleddd name for this structure/union  element.
-          std::string MangMemName = ElementName + UniqueSuffix;
-	  PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName);
-          short Class;
-          if( CTy.getTag() == dwarf::DW_TAG_union_type)
-            Class = PIC16Dbg::C_MOU;
-          else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
-            Class = PIC16Dbg::C_MOS;
-          EmitSymbol(MangMemName, Class, TypeNo, Value);
-          if (CTy.getTag() == dwarf::DW_TAG_structure_type)
-            Value += ElementSize;
-          if (HasAux)
-            EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName);
-        }
+
+        // Emit members.
+        EmitCompositeTypeElements (CTy, UniqueSuffix);
+
         // Emit mangled Symbol for end of structure/union.
         std::string EOSSymbol = ".eos" + UniqueSuffix;
         EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS);
@@ -214,6 +359,8 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
   }
 }
 
+/// EmitFunctBeginDI - Emit .bf for function.
+///
 void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
   std::string FunctName = F->getName();
   if (EmitDebugDirectives) {
@@ -221,16 +368,20 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
     std::string BlockBeginSym = ".bb." + FunctName;
 
     int BFAux[PIC16Dbg::AuxSize] = {0};
-    BFAux[4] = FunctBeginLine;
-    BFAux[5] = FunctBeginLine >> 8;
+    BFAux[4] = CurLine;
+    BFAux[5] = CurLine >> 8;
+
     // Emit debug directives for beginning of function.
     EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN);
     EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize);
+
     EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK);
     EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize);
   }
 }
 
+/// EmitFunctEndDI - Emit .ef for function end.
+///
 void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
   std::string FunctName = F->getName();
   if (EmitDebugDirectives) {
@@ -241,8 +392,8 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
     EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK);
     int EFAux[PIC16Dbg::AuxSize] = {0};
     // 5th and 6th byte stand for line number.
-    EFAux[4] = Line;
-    EFAux[5] = Line >> 8;
+    EFAux[4] = CurLine;
+    EFAux[5] = CurLine >> 8;
     EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize);
     EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN);
     EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize);
@@ -251,15 +402,18 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
 
 /// EmitAuxEntry - Emit Auxiliary debug information.
 ///
-void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num,
-                                std::string tag) {
+void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
+                                std::string TagName) {
   O << "\n\t.dim " << VarName << ", 1" ;
-  if (tag != "")
-    O << ", " << tag;
-  for (int i = 0; i<num; i++)
+  // TagName is emitted in case of structure/union objects.
+  if (TagName != "")
+    O << ", " << TagName;
+  for (int i = 0; i<Num; i++)
     O << "," << Aux[i];
 }
 
+/// EmitSymbol - Emit .def for a symbol. Value is offset for the member.
+///
 void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
                               Type, unsigned long Value) {
   O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = " 
@@ -268,6 +422,8 @@ void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
     O  << ", value = " << Value;
 }
 
+/// EmitVarDebugInfo - Emit debug information for all variables.
+///
 void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
   GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.global_variables");
   if (!Root)
@@ -283,47 +439,45 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
       unsigned short TypeNo = 0;
       bool HasAux = false;
       int Aux[PIC16Dbg::AuxSize] = { 0 };
-      std::string TypeName = "";
+      std::string TagName = "";
       std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName();
-      PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TypeName);
+      PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName);
       // Emit debug info only if type information is availaible.
       if (TypeNo != PIC16Dbg::T_NULL) {
         O << "\n\t.type " << VarName << ", " << TypeNo;
-        short ClassNo = getClass(DIGV);
+        short ClassNo = getStorageClass(DIGV);
         O << "\n\t.class " << VarName << ", " << ClassNo;
         if (HasAux) 
-          EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName);
+          EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName);
       }
     }
   }
   O << "\n";
 }
 
-void PIC16DbgInfo::EmitFileDirective(Module &M) {
-  GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
-  if (CU) {
-    EmitDebugDirectives = true;
-    EmitFileDirective(CU, false);
-  }
-}
+/// SwitchToCU - Switch to a new compilation unit.
+///
+void PIC16DbgInfo::SwitchToCU(GlobalVariable *CU) {
+  // Get the file path from CU.
+  DICompileUnit cu(CU);
+  std::string DirName, FileName;
+  std::string FilePath = cu.getDirectory(DirName) + "/" + 
+                         cu.getFilename(FileName);
 
-void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) {
-  std::string Dir, FN;
-  DICompileUnit DIUnit(CU);
-  std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
-  if ( File != CurFile ) {
-    if (EmitEof)
-      EmitEOF();
-    O << "\n\t.file\t\"" << File << "\"\n" ;
-    CurFile = File;
-  }
+  // Nothing to do if source file is still same.
+  if ( FilePath == CurFile ) return;
+
+  // Else, close the current one and start a new.
+  if (CurFile != "") O << "\n\t.eof";
+  O << "\n\t.file\t\"" << FilePath << "\"\n" ;
+  CurFile = FilePath;
+  CurLine = 0;
 }
 
+/// EmitEOF - Emit .eof for end of file.
+///
 void PIC16DbgInfo::EmitEOF() {
   if (CurFile != "")
     O << "\n\t.EOF";
 }
 
-void PIC16DbgInfo::SetFunctBeginLine(unsigned line) {
-  FunctBeginLine = line;
-}
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
index 9d50380..d126d85 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.h
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@@ -20,6 +20,8 @@
 #include <map>
 
 namespace llvm {
+  class MachineFunction;
+  class DebugLoc;
   namespace PIC16Dbg {
     enum VarType {
       T_NULL,
@@ -94,33 +96,64 @@ namespace llvm {
     raw_ostream &O;
     const TargetAsmInfo *TAI;
     std::string CurFile;
+    unsigned CurLine;
+
     // EmitDebugDirectives is set if debug information is available. Default
     // value for it is false.
     bool EmitDebugDirectives;
-    unsigned FunctBeginLine;
+
   public:
     PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {
-      CurFile = ""; 
+      CurFile = "";
+      CurLine = 0;
       EmitDebugDirectives = false; 
     }
-    void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux,
+
+    void BeginModule (Module &M);
+    void BeginFunction (const MachineFunction &MF);
+    void ChangeDebugLoc (const MachineFunction &MF, const DebugLoc &DL,
+                         bool IsInBeginFunction = false);
+    void EndFunction (const MachineFunction &MF);
+    void EndModule (Module &M);
+
+
+    private:
+    void SwitchToCU (GlobalVariable *CU);
+    void SwitchToLine (unsigned Line, bool IsInBeginFunction = false);
+
+    void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux,
                            int Aux[], std::string &TypeName);
-    unsigned GetTypeDebugNumber(std::string &type);
-    short getClass(DIGlobalVariable DIGV);
+    void PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo);
+    void PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo, 
+                                  bool &HasAux, int Aux[],
+                                  std::string &TypeName);
+
+    void PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                    bool &HasAux, int Aux[],
+                                    std::string &TypeName);
+    void PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                bool &HasAux, int Aux[],
+                                std::string &TypeName);
+
+    void PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo,
+                                        bool &HasAux, int Aux[],
+                                        std::string &TypeName);
+    void PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo);
+
+    unsigned GetTypeDebugNumber(std::string &Type);
+    short getStorageClass(DIGlobalVariable DIGV);
     void EmitFunctBeginDI(const Function *F);
-    void Init(Module &M);
     void EmitCompositeTypeDecls(Module &M);
+    void EmitCompositeTypeElements (DICompositeType CTy,
+                                    std::string UniqueSuffix);
     void EmitFunctEndDI(const Function *F, unsigned Line);
     void EmitAuxEntry(const std::string VarName, int Aux[], 
-                      int num = PIC16Dbg::AuxSize, std::string tag = "");
+                      int num = PIC16Dbg::AuxSize, std::string TagName = "");
     inline void EmitSymbol(std::string Name, short Class, 
                            unsigned short Type = PIC16Dbg::T_NULL, 
                            unsigned long Value = 0);
     void EmitVarDebugInfo(Module &M);
-    void EmitFileDirective(Module &M);
-    void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true);
     void EmitEOF();
-    void SetFunctBeginLine(unsigned line);
   };
 } // end namespace llvm;
 #endif
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index ba465f3..f113a48 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -31,42 +31,72 @@ static const char *getIntrinsicName(unsigned opcode) {
   std::string Basename;
   switch(opcode) {
   default: assert (0 && "do not know intrinsic name");
+  // Arithmetic Right shift for integer types.
   case PIC16ISD::SRA_I8: Basename = "sra.i8"; break;
   case RTLIB::SRA_I16: Basename = "sra.i16"; break;
   case RTLIB::SRA_I32: Basename = "sra.i32"; break;
 
+  // Left shift for integer types.
   case PIC16ISD::SLL_I8: Basename = "sll.i8"; break;
   case RTLIB::SHL_I16: Basename = "sll.i16"; break;
   case RTLIB::SHL_I32: Basename = "sll.i32"; break;
 
+  // Logical Right Shift for integer types.
   case PIC16ISD::SRL_I8: Basename = "srl.i8"; break;
   case RTLIB::SRL_I16: Basename = "srl.i16"; break;
   case RTLIB::SRL_I32: Basename = "srl.i32"; break;
 
+  // Multiply for integer types.
   case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
   case RTLIB::MUL_I16: Basename = "mul.i16"; break;
   case RTLIB::MUL_I32: Basename = "mul.i32"; break;
 
+  // Signed division for integers.
   case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break;
   case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break;
+
+  // Unsigned division for integers.
   case RTLIB::UDIV_I16: Basename = "udiv.i16"; break;
   case RTLIB::UDIV_I32: Basename = "udiv.i32"; break;
 
+  // Signed Modulas for integers.
   case RTLIB::SREM_I16: Basename = "srem.i16"; break;
   case RTLIB::SREM_I32: Basename = "srem.i32"; break;
+
+  // Unsigned Modulas for integers.
   case RTLIB::UREM_I16: Basename = "urem.i16"; break;
   case RTLIB::UREM_I32: Basename = "urem.i32"; break;
 
-  case RTLIB::FPTOSINT_F32_I32:
-               Basename = "f32_to_si32"; break;
-  case RTLIB::SINTTOFP_I32_F32:
-               Basename = "si32_to_f32"; break;
+  //////////////////////
+  // LIBCALLS FOR FLOATS
+  //////////////////////
+
+  // Float to signed integrals
+  case RTLIB::FPTOSINT_F32_I8: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOSINT_F32_I16: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOSINT_F32_I32: Basename = "f32_to_si32"; break;
+
+  // Signed integrals to float. char and int are first sign extended to i32 
+  // before being converted to float, so an I8_F32 or I16_F32 isn't required.
+  case RTLIB::SINTTOFP_I32_F32: Basename = "si32_to_f32"; break;
+
+  // Float to Unsigned conversions.
+  // Signed conversion can be used for unsigned conversion as well.
+  // In signed and unsigned versions only the interpretation of the 
+  // MSB is different. Bit representation remains the same. 
+  case RTLIB::FPTOUINT_F32_I8: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOUINT_F32_I16: Basename = "f32_to_si32"; break;
+  case RTLIB::FPTOUINT_F32_I32: Basename = "f32_to_si32"; break;
+
+  // Unsigned to Float conversions. char and int are first zero extended 
+  // before being converted to float.
+  case RTLIB::UINTTOFP_I32_F32: Basename = "ui32_to_f32"; break;
                
+  // Floating point add, sub, mul, div.
   case RTLIB::ADD_F32: Basename = "add.f32"; break;
   case RTLIB::SUB_F32: Basename = "sub.f32"; break;
   case RTLIB::MUL_F32: Basename = "mul.f32"; break;
   case RTLIB::DIV_F32: Basename = "div.f32"; break;
-  
   }
   
   std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -83,7 +113,7 @@ static const char *getIntrinsicName(unsigned opcode) {
 // PIC16TargetLowering Constructor.
 PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   : TargetLowering(TM), TmpSize(0) {
-  
+ 
   Subtarget = &TM.getSubtarget<PIC16Subtarget>();
 
   addRegisterClass(MVT::i8, PIC16::GPRRegisterClass);
@@ -114,6 +144,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   // Signed division lib call names
   setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16));
   setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32));
+
   // Unsigned division lib call names
   setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16));
   setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32));
@@ -121,15 +152,36 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   // Signed remainder lib call names
   setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16));
   setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32));
+
   // Unsigned remainder lib call names
   setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
   setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
  
-  // Floating point operations
+  // Floating point to signed int conversions.
+  setLibcallName(RTLIB::FPTOSINT_F32_I8, 
+                 getIntrinsicName(RTLIB::FPTOSINT_F32_I8));
+  setLibcallName(RTLIB::FPTOSINT_F32_I16, 
+                 getIntrinsicName(RTLIB::FPTOSINT_F32_I16));
   setLibcallName(RTLIB::FPTOSINT_F32_I32, 
                  getIntrinsicName(RTLIB::FPTOSINT_F32_I32));
+
+  // Signed int to floats.
   setLibcallName(RTLIB::SINTTOFP_I32_F32, 
                  getIntrinsicName(RTLIB::SINTTOFP_I32_F32));
+
+  // Floating points to unsigned ints.
+  setLibcallName(RTLIB::FPTOUINT_F32_I8, 
+                 getIntrinsicName(RTLIB::FPTOUINT_F32_I8));
+  setLibcallName(RTLIB::FPTOUINT_F32_I16, 
+                 getIntrinsicName(RTLIB::FPTOUINT_F32_I16));
+  setLibcallName(RTLIB::FPTOUINT_F32_I32, 
+                 getIntrinsicName(RTLIB::FPTOUINT_F32_I32));
+
+  // Unsigned int to floats.
+  setLibcallName(RTLIB::UINTTOFP_I32_F32, 
+                 getIntrinsicName(RTLIB::UINTTOFP_I32_F32));
+
+  // Floating point add, sub, mul ,div.
   setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32));
   setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32));
   setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index bda6326..d4f46a4 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -37,6 +37,11 @@ X("pic16", "PIC16 14-bit [experimental].");
 static RegisterTarget<CooperTargetMachine> 
 Y("cooper", "PIC16 Cooper [experimental].");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializePIC16Target() { }
+}
+
 // PIC16TargetMachine - Traditional PIC16 Machine.
 PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS,
                                        bool Cooper)
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 7723982..c7bfb6d 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -646,19 +646,10 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
 bool PPCLinuxAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
-
-  // Emit initial debug information.
+  DW = getAnalysisIfAvailable<DwarfWriter>();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "DwarfWriter is not available");
-  DW->BeginModule(&M, MMI, O, this, TAI);
-
-  // GNU as handles section names wrapped in quotes
-  Mang->setUseQuotes(true);
-
   SwitchToSection(TAI->getTextSection());
-
   return Result;
 }
 
@@ -875,18 +866,9 @@ bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
   O << "\t.machine " << CPUDirectives[Directive] << '\n';
 
   bool Result = AsmPrinter::doInitialization(M);
-
-  // Emit initial debug information.
-  // We need this for Personality functions.
-  // AsmPrinter::doInitialization should have done this analysis.
+  DW = getAnalysisIfAvailable<DwarfWriter>();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "DwarfWriter is not available");
-  DW->BeginModule(&M, MMI, O, this, TAI);
-
-  // Darwin wants symbols to be quoted if they have complex names.
-  Mang->setUseQuotes(true);
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
@@ -1202,3 +1184,9 @@ namespace {
 
 extern "C" int PowerPCAsmPrinterForceLink;
 int PowerPCAsmPrinterForceLink = 0;
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializePowerPCAsmPrinter() { }
+}
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
index c69e591..ebffd69 100644
--- a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
@@ -19,59 +19,19 @@
 using namespace llvm;
 using namespace llvm::dwarf;
 
-PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM):
+PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM) :
   PPCTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
   PCSymbol = ".";
   CommentString = ";";
-  GlobalPrefix = "_";
-  PrivateGlobalPrefix = "L";
-  LessPrivateGlobalPrefix = "l";
-  StringConstantPrefix = "\1LC";
   ConstantPoolSection = "\t.const\t";
-  JumpTableDataSection = ".const";
-  CStringSection = "\t.cstring";
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorsSection = ".constructor";
-    StaticDtorsSection = ".destructor";
-  } else {
-    StaticCtorsSection = ".mod_init_func";
-    StaticDtorsSection = ".mod_term_func";
-  }
-  HasSingleParameterDotFile = false;
-  SwitchToSectionDirective = "\t.section ";
   UsedDirective = "\t.no_dead_strip\t";
-  WeakDefDirective = "\t.weak_definition ";
-  WeakRefDirective = "\t.weak_reference ";
-  HiddenDirective = "\t.private_extern ";
   SupportsExceptionHandling = true;
-  NeedsIndirectEncoding = true;
-  NeedsSet = true;
-  BSSSection = 0;
   
   DwarfEHFrameSection =
-  ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
+   ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
   DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
   GlobalEHDirective = "\t.globl\t";
   SupportsWeakOmittedEHFrame = false;
-
-  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
-  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
-  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
-  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
-  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
-  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
-  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
-  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
-  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
-  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
-  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
-  
-  // In non-PIC modes, emit a special label before jump tables so that the
-  // linker can perform more accurate dead code stripping.
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    // Emit a local label that is preserved until the linker runs.
-    JumpTableSpecialLabelPrefix = "l";
-  }
 }
 
 /// PreferredEHDataFormat - This hook allows the target to select data
@@ -131,7 +91,7 @@ PPCLinuxTargetAsmInfo::PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM) :
   DwarfLocSection =     "\t.section\t.debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
 
   PCSymbol = ".";
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index ef3f0fc..3e89885 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -35,6 +35,11 @@ X("ppc32", "PowerPC 32");
 static RegisterTarget<PPC64TargetMachine>
 Y("ppc64", "PowerPC 64");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializePowerPCTarget() { }
+}
+
 // No assembler printer by default
 PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0;
 
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 61707f5..6a2fdca 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -15,6 +15,7 @@
 #define DEBUG_TYPE "asm-printer"
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
@@ -24,8 +25,6 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/Statistic.h"
@@ -88,6 +87,7 @@ FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o,
   return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
 
+
 /// runOnMachineFunction - This uses the printInstruction()
 /// method to print assembly for each instruction.
 ///
@@ -353,3 +353,17 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 
   return false;
 }
+
+namespace {
+  static struct Register {
+    Register() {
+      SparcTargetMachine::registerAsmPrinter(createSparcCodePrinterPass);
+    }
+  } Registrator;
+}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeSparcAsmPrinter() { }
+}
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index eda0309..fd0f124 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -18,17 +18,18 @@
 #include "llvm/Target/TargetMachineRegistry.h"
 using namespace llvm;
 
-/// SparcTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library.  In particular, it seems that it is not possible to get
-/// things to work on Win32 without this.  Though it is unused, do not
-/// remove it.
-extern "C" int SparcTargetMachineModule;
-int SparcTargetMachineModule = 0;
-
 // Register the target.
 static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC");
 
+// No assembler printer by default
+SparcTargetMachine::AsmPrinterCtorFn SparcTargetMachine::AsmPrinterCtor = 0;
+
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeSparcTarget() { }
+}
+
 const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
   // FIXME: Handle Solaris subtarget someday :)
   return new SparcELFTargetAsmInfo(*this);
@@ -89,6 +90,8 @@ bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
                                             bool Verbose,
                                             raw_ostream &Out) {
   // Output assembly language.
-  PM.add(createSparcCodePrinterPass(Out, *this, OptLevel, Verbose));
+  assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+  if (AsmPrinterCtor)
+    PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
   return false;
 }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 40b44f2..8afcc73 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -35,6 +35,14 @@ class SparcTargetMachine : public LLVMTargetMachine {
 protected:
   virtual const TargetAsmInfo *createTargetAsmInfo() const;
   
+  // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+  // set this functions to ctor pointer at startup time if they are linked in.
+  typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+                                            TargetMachine &tm,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool verbose);
+  static AsmPrinterCtorFn AsmPrinterCtor;
+  
 public:
   SparcTargetMachine(const Module &M, const std::string &FS);
 
@@ -56,6 +64,10 @@ public:
   virtual bool addAssemblyEmitter(PassManagerBase &PM,
                                   CodeGenOpt::Level OptLevel,
                                   bool Verbose, raw_ostream &Out);
+  
+  static void registerAsmPrinter(AsmPrinterCtorFn F) {
+    AsmPrinterCtor = F;
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
index 6a2de6f..3f5f1bd 100644
--- a/lib/Target/TargetAsmInfo.cpp
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -24,10 +24,10 @@
 #include "llvm/Support/Dwarf.h"
 #include <cctype>
 #include <cstring>
-
 using namespace llvm;
 
-void TargetAsmInfo::fillDefaultValues() {
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
+: TM(tm) {
   BSSSection = "\t.bss";
   BSSSection_ = 0;
   ReadOnlySection = 0;
@@ -58,6 +58,7 @@ void TargetAsmInfo::fillDefaultValues() {
   InlineAsmEnd = "#NO_APP";
   AssemblerDialect = 0;
   StringConstantPrefix = ".str";
+  AllowQuotesInName = false;
   ZeroDirective = "\t.zero\t";
   ZeroDirectiveSuffix = 0;
   AsciiDirective = "\t.ascii\t";
@@ -102,7 +103,6 @@ void TargetAsmInfo::fillDefaultValues() {
   SupportsExceptionHandling = false;
   DwarfRequiresFrameSection = true;
   DwarfUsesInlineInfoSection = false;
-  SupportsMacInfoSection = true;
   NonLocalEHFrameLabel = false;
   GlobalEHDirective = 0;
   SupportsWeakOmittedEHFrame = true;
@@ -118,7 +118,7 @@ void TargetAsmInfo::fillDefaultValues() {
   DwarfLocSection = ".debug_loc";
   DwarfARangesSection = ".debug_aranges";
   DwarfRangesSection = ".debug_ranges";
-  DwarfMacInfoSection = ".debug_macinfo";
+  DwarfMacroInfoSection = ".debug_macinfo";
   DwarfEHFrameSection = ".eh_frame";
   DwarfExceptionSection = ".gcc_except_table";
   AsmTransCBE = 0;
@@ -126,11 +126,6 @@ void TargetAsmInfo::fillDefaultValues() {
   DataSection = getUnnamedSection("\t.data", SectionFlags::Writeable);
 }
 
-TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
-  : TM(tm) {
-  fillDefaultValues();
-}
-
 TargetAsmInfo::~TargetAsmInfo() {
 }
 
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index dbd03d8..368bcaa 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -2,6 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 
 add_partially_linked_object(LLVMX86AsmPrinter
   X86ATTAsmPrinter.cpp
+  X86ATTInstPrinter.cpp
   X86AsmPrinter.cpp
   X86IntelAsmPrinter.cpp
   )
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
index 8afe2ea..60ed4f0 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -26,8 +26,10 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmInfo.h"
@@ -36,6 +38,9 @@ using namespace llvm;
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
+static cl::opt<bool> NewAsmPrinter("experimental-asm-printer",
+                                   cl::Hidden);
+
 static std::string getPICLabelString(unsigned FnNum,
                                      const TargetAsmInfo *TAI,
                                      const X86Subtarget* Subtarget) {
@@ -266,7 +271,7 @@ bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
 
   // Emit post-function debug information.
-  if (TAI->doesSupportDebugInformation())
+  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
     DW->EndFunction(&MF);
 
   // Print out jump tables referenced by the function.
@@ -291,6 +296,136 @@ static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) {
   return ST->isPICStyleStub() && TM.getRelocationModel() != Reloc::Static;
 }
 
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: assert(0 && "Unknown pcrel immediate operand");
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
+    return;
+      
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getValueName(GV);
+    decorateName(Name, GV);
+    
+    bool needCloseParen = false;
+    if (Name[0] == '$') {
+      // The name begins with a dollar-sign. In order to avoid having it look
+      // like an integer immediate to the assembler, enclose it in parens.
+      O << '(';
+      needCloseParen = true;
+    }
+    
+    if (shouldPrintStub(TM, Subtarget)) {
+      // Link-once, declaration, or Weakly-linked global variables need
+      // non-lazily-resolved stubs
+      if (GV->isDeclaration() || GV->isWeakForLinker()) {
+        // Dynamically-resolved functions need a stub for the function.
+        if (isa<Function>(GV)) {
+          // Function stubs are no longer needed for Mac OS X 10.5 and up.
+          if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
+            O << Name;
+          } else {
+            FnStubs.insert(Name);
+            printSuffixedName(Name, "$stub");
+          }
+        } else if (GV->hasHiddenVisibility()) {
+          if (!GV->isDeclaration() && !GV->hasCommonLinkage())
+            // Definition is not definitely in the current translation unit.
+            O << Name;
+          else {
+            HiddenGVStubs.insert(Name);
+            printSuffixedName(Name, "$non_lazy_ptr");
+          }
+        } else {
+          GVStubs.insert(Name);
+          printSuffixedName(Name, "$non_lazy_ptr");
+        }
+      } else {
+        if (GV->hasDLLImportLinkage())
+          O << "__imp_";
+        O << Name;
+      }
+    } else {
+      if (GV->hasDLLImportLinkage()) {
+        O << "__imp_";
+      }
+      O << Name;
+      
+      if (shouldPrintPLT(TM, Subtarget)) {
+        // Assemble call via PLT for externally visible symbols
+        if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+            !GV->hasLocalLinkage())
+          O << "@PLT";
+      }
+      if (Subtarget->isTargetCygMing() && GV->isDeclaration())
+        // Save function name for later type emission
+        FnStubs.insert(Name);
+    }
+    
+    if (GV->hasExternalWeakLinkage())
+      ExtWeakSymbols.insert(GV);
+    
+    printOffset(MO.getOffset());
+    
+    if (needCloseParen)
+      O << ')';
+    return;
+  }
+      
+  case MachineOperand::MO_ExternalSymbol: {
+    bool needCloseParen = false;
+    std::string Name(TAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    // Print function stub suffix unless it's Mac OS X 10.5 and up.
+    if (shouldPrintStub(TM, Subtarget) && 
+        !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
+      FnStubs.insert(Name);
+      printSuffixedName(Name, "$stub");
+      return;
+    }
+    
+    if (Name[0] == '$') {
+      // The name begins with a dollar-sign. In order to avoid having it look
+      // like an integer immediate to the assembler, enclose it in parens.
+      O << '(';
+      needCloseParen = true;
+    }
+    
+    O << Name;
+    
+    if (shouldPrintPLT(TM, Subtarget)) {
+      std::string GOTName(TAI->getGlobalPrefix());
+      GOTName+="_GLOBAL_OFFSET_TABLE_";
+      if (Name == GOTName)
+        // HACK! Emit extra offset to PC during printing GOT offset to
+        // compensate for the size of popl instruction. The resulting code
+        // should look like:
+        //   call .piclabel
+        // piclabel:
+        //   popl %some_register
+        //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
+        O << " + [.-"
+          << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
+      
+      O << "@PLT";
+    }
+    
+    if (needCloseParen)
+      O << ')';
+    
+    return;
+  }
+  }
+}
+
 void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                     const char *Modifier, bool NotRIPRel) {
   const MachineOperand &MO = MI->getOperand(OpNo);
@@ -312,14 +447,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
   case MachineOperand::MO_Immediate:
     if (!Modifier || (strcmp(Modifier, "debug") &&
-                      strcmp(Modifier, "mem") &&
-                      strcmp(Modifier, "call")))
+                      strcmp(Modifier, "mem")))
       O << '$';
     O << MO.getImm();
     return;
-  case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
-    return;
   case MachineOperand::MO_JumpTableIndex: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     if (!isMemOp) O << '$';
@@ -359,8 +490,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     return;
   }
   case MachineOperand::MO_GlobalAddress: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    bool isMemOp = Modifier && !strcmp(Modifier, "mem");
     bool needCloseParen = false;
 
     const GlobalValue *GV = MO.getGlobal();
@@ -369,7 +499,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       // If GV is an alias then use the aliasee for determining
       // thread-localness.
       if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-        GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+        GVar =dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
     }
 
     bool isThreadLocal = GVar && GVar->isThreadLocal();
@@ -377,7 +507,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     std::string Name = Mang->getValueName(GV);
     decorateName(Name, GV);
 
-    if (!isMemOp && !isCallOp)
+    if (!isMemOp)
       O << '$';
     else if (Name[0] == '$') {
       // The name begins with a dollar-sign. In order to avoid having it look
@@ -391,15 +521,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       // non-lazily-resolved stubs
       if (GV->isDeclaration() || GV->isWeakForLinker()) {
         // Dynamically-resolved functions need a stub for the function.
-        if (isCallOp && isa<Function>(GV)) {
-          // Function stubs are no longer needed for Mac OS X 10.5 and up.
-          if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
-            O << Name;
-          } else {
-            FnStubs.insert(Name);
-            printSuffixedName(Name, "$stub");
-          }
-        } else if (GV->hasHiddenVisibility()) {
+        if (GV->hasHiddenVisibility()) {
           if (!GV->isDeclaration() && !GV->hasCommonLinkage())
             // Definition is not definitely in the current translation unit.
             O << Name;
@@ -417,25 +539,12 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
         O << Name;
       }
 
-      if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
+      if (TM.getRelocationModel() == Reloc::PIC_)
         O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget);
     } else {
-      if (GV->hasDLLImportLinkage()) {
+      if (GV->hasDLLImportLinkage())
         O << "__imp_";
-      }
       O << Name;
-
-      if (isCallOp) {
-        if (shouldPrintPLT(TM, Subtarget)) {
-          // Assemble call via PLT for externally visible symbols
-          if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
-              !GV->hasLocalLinkage())
-            O << "@PLT";
-        }
-        if (Subtarget->isTargetCygMing() && GV->isDeclaration())
-          // Save function name for later type emission
-          FnStubs.insert(Name);
-      }
     }
 
     if (GV->hasExternalWeakLinkage())
@@ -443,6 +552,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
     printOffset(MO.getOffset());
 
+    if (needCloseParen)
+      O << ')';
+    
+    bool isRIPRelative = false;
     if (isThreadLocal) {
       TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel());
       switch (model) {
@@ -456,7 +569,8 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
       case TLSModel::InitialExec:
         if (Subtarget->is64Bit()) {
           assert (!NotRIPRel);
-          O << "@GOTTPOFF(%rip)";
+          O << "@GOTTPOFF";
+          isRIPRelative = true;
         } else {
           O << "@INDNTPOFF";
         }
@@ -476,43 +590,33 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
           O << "@GOT";
         else
           O << "@GOTOFF";
-      } else if (Subtarget->isPICStyleRIPRel() && !NotRIPRel) {
+      } else if (Subtarget->isPICStyleRIPRel() &&
+                 !NotRIPRel) {
         if (TM.getRelocationModel() != Reloc::Static) {
           if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
             O << "@GOTPCREL";
-
-          if (needCloseParen) {
-            needCloseParen = false;
-            O << ')';
-          }
         }
-
-        // Use rip when possible to reduce code size, except when
-        // index or base register are also part of the address. e.g.
-        // foo(%rip)(%rcx,%rax,4) is not legal
-        O << "(%rip)";
+        
+        isRIPRelative = true;
       }
     }
 
-    if (needCloseParen)
-      O << ')';
-
+    // Use rip when possible to reduce code size, except when
+    // index or base register are also part of the address. e.g.
+    // foo(%rip)(%rcx,%rax,4) is not legal.
+    if (isRIPRelative)
+      O << "(%rip)";
+    
     return;
   }
   case MachineOperand::MO_ExternalSymbol: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     bool needCloseParen = false;
     std::string Name(TAI->getGlobalPrefix());
     Name += MO.getSymbolName();
+
     // Print function stub suffix unless it's Mac OS X 10.5 and up.
-    if (isCallOp && shouldPrintStub(TM, Subtarget) && 
-        !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
-      FnStubs.insert(Name);
-      printSuffixedName(Name, "$stub");
-      return;
-    }
-    if (!isMemOp && !isCallOp)
+    if (!isMemOp)
       O << '$';
     else if (Name[0] == '$') {
       // The name begins with a dollar-sign. In order to avoid having it look
@@ -536,17 +640,13 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
         //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
         O << " + [.-"
           << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
-
-      if (isCallOp)
-        O << "@PLT";
     }
 
     if (needCloseParen)
       O << ')';
 
-    if (!isCallOp && Subtarget->isPICStyleRIPRel())
+    if (Subtarget->isPICStyleRIPRel())
       O << "(%rip)";
-
     return;
   }
   default:
@@ -673,8 +773,7 @@ void X86ATTAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
     printBasicBlockLabel(MBB, false, false, false);
 }
 
-bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO,
-                                         const char Mode) {
+bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) {
   unsigned Reg = MO.getReg();
   switch (Mode) {
   default: return true;  // Unknown mode.
@@ -758,38 +857,85 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+  // Convert registers in the addr mode according to subreg64.
+  for (unsigned i = 0; i != 4; ++i) {
+    if (!MI->getOperand(i).isReg()) continue;
+    
+    unsigned Reg = MI->getOperand(i).getReg();
+    if (Reg == 0) continue;
+    
+    MI->getOperand(i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+  }
+}
+
 /// printMachineInstruction -- Print out a single X86 LLVM instruction MI in
 /// AT&T syntax to the current output stream.
 ///
 void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
 
+  if (NewAsmPrinter) {
+    if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+      O << "\t";
+      printInlineAsm(MI);
+      return;
+    } else if (MI->isLabel()) {
+      printLabel(MI);
+      return;
+    } else if (MI->getOpcode() == TargetInstrInfo::DECLARE) {
+      printDeclare(MI);
+      return;
+    } else if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+      printImplicitDef(MI);
+      return;
+    }
+    
+    O << "NEW: ";
+    MCInst TmpInst;
+    
+    TmpInst.setOpcode(MI->getOpcode());
+    
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      
+      MCOperand MCOp;
+      if (MO.isReg()) {
+        MCOp.MakeReg(MO.getReg());
+      } else if (MO.isImm()) {
+        MCOp.MakeImm(MO.getImm());
+      } else if (MO.isMBB()) {
+        MCOp.MakeMBBLabel(getFunctionNumber(), MO.getMBB()->getNumber());
+      } else {
+        assert(0 && "Unimp");
+      }
+      
+      TmpInst.addOperand(MCOp);
+    }
+    
+    switch (TmpInst.getOpcode()) {
+    case X86::LEA64_32r:
+      // Handle the 'subreg rewriting' for the lea64_32mem operand.
+      lower_lea64_32mem(&TmpInst, 1);
+      break;
+    }
+    
+    // FIXME: Convert TmpInst.
+    printInstruction(&TmpInst);
+    O << "OLD: ";
+  }
+  
   // Call the autogenerated instruction printer routines.
   printInstruction(MI);
 }
 
 /// doInitialization
 bool X86ATTAsmPrinter::doInitialization(Module &M) {
-
-  bool Result = AsmPrinter::doInitialization(M);
-
-  if (TAI->doesSupportDebugInformation()) {
-    // Let PassManager know we need debug information and relay
-    // the MachineModuleInfo address on to DwarfWriter.
-    // AsmPrinter::doInitialization did this analysis.
+  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) 
     MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-    DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->BeginModule(&M, MMI, O, this, TAI);
-  }
-
-  // Darwin wants symbols to be quoted if they have complex names.
-  if (Subtarget->isTargetDarwin())
-    Mang->setUseQuotes(true);
-
-  return Result;
+  return AsmPrinter::doInitialization(M);
 }
 
-
 void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   const TargetData *TD = TM.getTargetData();
 
@@ -1040,8 +1186,8 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
     }
 
     // Emit final debug information.
-    DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->EndModule();
+    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+      DW->EndModule();
 
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
@@ -1060,12 +1206,12 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
     }
 
     // Emit final debug information.
-    DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->EndModule();
+    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+      DW->EndModule();
   } else if (Subtarget->isTargetELF()) {
     // Emit final debug information.
-    DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
-    DW->EndModule();
+    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+      DW->EndModule();
   }
 
   return AsmPrinter::doFinalization(M);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
index 5b40e73..68a6bc8 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -27,16 +27,16 @@
 namespace llvm {
 
 class MachineJumpTableInfo;
+class MCInst;
 
 class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
-  DwarfWriter *DW;
   MachineModuleInfo *MMI;
   const X86Subtarget *Subtarget;
  public:
   explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
                             bool V)
-    : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {
+    : AsmPrinter(O, TM, T, OL, V), MMI(0) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
   }
 
@@ -63,10 +63,62 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   /// machine instruction was sufficiently described to print it, otherwise it
   /// returns false.
   bool printInstruction(const MachineInstr *MI);
+  
+  
+  // New MCInst printing stuff.
+  bool printInstruction(const MCInst *MI);
+
+  void printOperand(const MCInst *MI, unsigned OpNo,
+                    const char *Modifier = 0, bool NotRIPRel = false);
+  void printMemReference(const MCInst *MI, unsigned Op);
+  void printLeaMemReference(const MCInst *MI, unsigned Op);
+  void printSSECC(const MCInst *MI, unsigned Op);
+  void printPICLabel(const MCInst *MI, unsigned Op);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo);
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
+  void printlea32mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  void printlea64_32mem(const MCInst *MI, unsigned OpNo) {
+    printLeaMemReference(MI, OpNo);
+  }
+  
+  
 
   // These methods are used by the tablegen'erated instruction printer.
   void printOperand(const MachineInstr *MI, unsigned OpNo,
                     const char *Modifier = 0, bool NotRIPRel = false);
+  void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
   void printi8mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
@@ -104,7 +156,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
     printLeaMemReference(MI, OpNo, "subreg64");
   }
 
-  bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+  bool printAsmMRegister(const MachineOperand &MO, char Mode);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode);
   bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 0000000..9d50edc
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,143 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/MC/MCInst.h"
+#include "X86ATTAsmPrinter.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define MachineInstr MCInst
+#define NO_ASM_WRITER_BOILERPLATE
+#include "X86GenAsmWriter.inc"
+#undef MachineInstr
+
+void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: assert(0 && "Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+
+void X86ATTAsmPrinter::printPICLabel(const MCInst *MI, unsigned Op) {
+  assert(0 &&
+         "This is only used for MOVPC32r, should lower before asm printing!");
+}
+
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  
+  if (Op.isImm())
+    O << Op.getImm();
+  else if (Op.isMBBLabel())
+    // FIXME: Keep in sync with printBasicBlockLabel.  printBasicBlockLabel
+    // should eventually call into this code, not the other way around.
+    O << TAI->getPrivateGlobalPrefix() << "BB" << Op.getMBBLabelFunction()
+      << '_' << Op.getMBBLabelBlock();
+  else
+    assert(0 && "Unknown pcrel immediate operand");
+}
+
+
+void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                    const char *Modifier, bool NotRIPRel) {
+  assert(Modifier == 0 && "Modifiers should not be used");
+  
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << '%';
+    unsigned Reg = Op.getReg();
+#if 0
+    if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+      MVT VT = (strcmp(Modifier+6,"64") == 0) ?
+      MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+                  ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+      Reg = getX86SubSuperRegister(Reg, VT);
+    }
+#endif
+    O << TRI->getAsmName(Reg);
+    return;
+  } else if (Op.isImm()) {
+    //if (!Modifier || (strcmp(Modifier, "debug") && strcmp(Modifier, "mem")))
+    O << '$';
+    O << Op.getImm();
+    return;
+  }
+  
+  O << "<<UNKNOWN OPERAND KIND>>";
+}
+
+void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
+  bool NotRIPRel = false;
+
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  
+  NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
+  if (DispSpec.isImm()) {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+      O << DispVal;
+  } else {
+    abort();
+    //assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+    //       DispSpec.isJTI() || DispSpec.isSymbol());
+    //printOperand(MI, Op+3, "mem", NotRIPRel);
+  }
+  
+  if (IndexReg.getReg() || BaseReg.getReg()) {
+    // There are cases where we can end up with ESP/RSP in the indexreg slot.
+    // If this happens, swap the base/index register to support assemblers that
+    // don't work when the index is *SP.
+    // FIXME: REMOVE THIS.
+    assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP);
+    
+    O << '(';
+    if (BaseReg.getReg())
+      printOperand(MI, Op);
+    
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op+2);
+      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+      if (ScaleVal != 1)
+        O << ',' << ScaleVal;
+    }
+    O << ')';
+  }
+}
+
+void X86ATTAsmPrinter::printMemReference(const MCInst *MI, unsigned Op) {
+  const MCOperand &Segment = MI->getOperand(Op+4);
+  if (Segment.getReg()) {
+    printOperand(MI, Op+4);
+    O << ':';
+  }
+  printLeaMemReference(MI, Op);
+}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index c874849..a39203b 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -29,13 +29,11 @@ FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
                                              bool verbose) {
   const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
 
-  if (Subtarget->isFlavorIntel()) {
+  if (Subtarget->isFlavorIntel())
     return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(),
                                   OptLevel, verbose);
-  } else {
-    return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
-                                OptLevel, verbose);
-  }
+  return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+                              OptLevel, verbose);
 }
 
 namespace {
@@ -48,3 +46,9 @@ namespace {
 
 extern "C" int X86AsmPrinterForceLink;
 int X86AsmPrinterForceLink = 0;
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+  void InitializeX86AsmPrinter() { }
+}
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
index 6599349..ceae7be 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
@@ -223,9 +223,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
   case MachineOperand::MO_Immediate:
     O << MO.getImm();
     return;
-  case MachineOperand::MO_MachineBasicBlock:
-    printBasicBlockLabel(MO.getMBB());
-    return;
   case MachineOperand::MO_JumpTableIndex: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     if (!isMemOp) O << "OFFSET ";
@@ -243,14 +240,13 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
     return;
   }
   case MachineOperand::MO_GlobalAddress: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
     GlobalValue *GV = MO.getGlobal();
     std::string Name = Mang->getValueName(GV);
 
     decorateName(Name, GV);
 
-    if (!isMemOp && !isCallOp) O << "OFFSET ";
+    if (!isMemOp) O << "OFFSET ";
     if (GV->hasDLLImportLinkage()) {
       // FIXME: This should be fixed with full support of stdcall & fastcall
       // CC's
@@ -261,8 +257,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
     return;
   }
   case MachineOperand::MO_ExternalSymbol: {
-    bool isCallOp = Modifier && !strcmp(Modifier, "call");
-    if (!isCallOp) O << "OFFSET ";
     O << TAI->getGlobalPrefix() << MO.getSymbolName();
     return;
   }
@@ -271,6 +265,39 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
   }
 }
 
+void X86IntelAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo){
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: assert(0 && "Unknown pcrel immediate operand");
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    printBasicBlockLabel(MO.getMBB());
+    return;
+    
+  case MachineOperand::MO_GlobalAddress: {
+    GlobalValue *GV = MO.getGlobal();
+    std::string Name = Mang->getValueName(GV);
+    decorateName(Name, GV);
+    
+    if (GV->hasDLLImportLinkage()) {
+      // FIXME: This should be fixed with full support of stdcall & fastcall
+      // CC's
+      O << "__imp_";
+    }
+    O << Name;
+    printOffset(MO.getOffset());
+    return;
+  }
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << TAI->getGlobalPrefix() << MO.getSymbolName();
+    return;
+  }
+}
+
+
 void X86IntelAsmPrinter::printLeaMemReference(const MachineInstr *MI,
                                               unsigned Op,
                                               const char *Modifier) {
@@ -339,8 +366,8 @@ void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
 }
 
 void X86IntelAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
-  O << "\"L" << getFunctionNumber() << "$pb\"\n";
-  O << "\"L" << getFunctionNumber() << "$pb\":";
+  O << "L" << getFunctionNumber() << "$pb\n";
+  O << "L" << getFunctionNumber() << "$pb:";
 }
 
 bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
@@ -362,7 +389,7 @@ bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
     break;
   }
 
-  O << '%' << TRI->getName(Reg);
+  O << TRI->getName(Reg);
   return false;
 }
 
@@ -414,7 +441,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
 
   Mang->markCharUnacceptable('.');
 
-  O << "\t.686\n\t.model flat\n\n";
+  O << "\t.686\n\t.MMX\n\t.XMM\n\t.model flat\n\n";
 
   // Emit declarations for external functions.
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
@@ -422,7 +449,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
       std::string Name = Mang->getValueName(I);
       decorateName(Name, I);
 
-      O << "\textern " ;
+      O << "\tEXTERN " ;
       if (I->hasDLLImportLinkage()) {
         O << "__imp_";
       }
@@ -436,7 +463,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
     if (I->isDeclaration()) {
       std::string Name = Mang->getValueName(I);
 
-      O << "\textern " ;
+      O << "\tEXTERN " ;
       if (I->hasDLLImportLinkage()) {
         O << "__imp_";
       }
@@ -471,14 +498,14 @@ bool X86IntelAsmPrinter::doFinalization(Module &M) {
     case GlobalValue::WeakAnyLinkage:
     case GlobalValue::WeakODRLinkage:
       SwitchToDataSection("");
-      O << name << "?\tsegment common 'COMMON'\n";
+      O << name << "?\tSEGEMNT PARA common 'COMMON'\n";
       bCustomSegment = true;
       // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
       // are also available.
       break;
     case GlobalValue::AppendingLinkage:
       SwitchToDataSection("");
-      O << name << "?\tsegment public 'DATA'\n";
+      O << name << "?\tSEGMENT PARA public 'DATA'\n";
       bCustomSegment = true;
       // FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
       // are also available.
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
index 9520d98..04f2595 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
@@ -52,6 +52,9 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
       printOp(MO, Modifier);
     }
   }
+  
+  void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
+
 
   void printi8mem(const MachineInstr *MI, unsigned OpNo) {
     O << "BYTE PTR ";
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 3796aac..4464878 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1858,8 +1858,23 @@ Ideal output:
 	setne	%al
 	ret
 
-We could do this transformation in instcombine, but it's only clearly
-beneficial on platforms with a test instruction.
+This should definitely be done in instcombine, canonicalizing the range
+condition into a != condition.  We get this IR:
+
+define i32 @a(i32 %x) nounwind readnone {
+entry:
+	%0 = and i32 %x, 127		; <i32> [#uses=1]
+	%1 = icmp ugt i32 %0, 31		; <i1> [#uses=1]
+	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
+	ret i32 %2
+}
+
+Instcombine prefers to strength reduce relational comparisons to equality
+comparisons when possible, this should be another case of that.  This could
+be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it
+looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already
+be redesigned to use ComputeMaskedBits and friends.
+
 
 //===---------------------------------------------------------------------===//
 Testcase:
@@ -1880,20 +1895,40 @@ Ideal output:
 
 Testcase:
 int x(int a) { return (a & 0x80) ? 0x100 : 0; }
+int y(int a) { return (a & 0x80) *2; }
 
-Current output:
+Current:
 	testl	$128, 4(%esp)
 	setne	%al
 	movzbl	%al, %eax
 	shll	$8, %eax
 	ret
 
-Ideal output:
+Better:
 	movl	4(%esp), %eax
 	addl	%eax, %eax
 	andl	$256, %eax
 	ret
 
-We generally want to fold shifted tests of a single bit into a shift+and on x86.
+This is another general instcombine transformation that is profitable on all
+targets.  In LLVM IR, these functions look like this:
+
+define i32 @x(i32 %a) nounwind readnone {
+entry:
+	%0 = and i32 %a, 128
+	%1 = icmp eq i32 %0, 0
+	%iftmp.0.0 = select i1 %1, i32 0, i32 256
+	ret i32 %iftmp.0.0
+}
+
+define i32 @y(i32 %a) nounwind readnone {
+entry:
+	%0 = shl i32 %a, 1
+	%1 = and i32 %0, 256
+	ret i32 %1
+}
+
+Replacing an icmp+select with a shift should always be considered profitable in
+instcombine.
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 0f2fbcc..ed4eb44 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -991,8 +991,13 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   case X86::FpSET_ST0_32:
   case X86::FpSET_ST0_64:
   case X86::FpSET_ST0_80:
-    assert((StackTop == 1 || StackTop == 2)
-           && "Stack should have one or two element on it to return!");
+    // FpSET_ST0_80 is generated by copyRegToReg for both function return
+    // and inline assembly with the "st" constrain. In the latter case,
+    // it is possible for FP0 to be alive after this instruction.
+    if (!MI->killsRegister(X86::FP0)) {
+      // Duplicate ST0
+      duplicateToTop(0, 0, I);
+    }
     --StackTop;   // "Forget" we have something on the top of stack!
     break;
   case X86::FpSET_ST1_32:
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index b003efd..9cedafc 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -167,6 +167,8 @@ namespace {
                     SDValue &Segment);
     bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp);
+    bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+                       SDValue &Scale, SDValue &Index, SDValue &Disp);
     bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
                              SDValue N, SDValue &Base, SDValue &Scale,
                              SDValue &Index, SDValue &Disp,
@@ -1293,6 +1295,32 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
   return false;
 }
 
+/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+                                        SDValue &Scale, SDValue &Index,
+                                        SDValue &Disp) {
+  assert(Op.getOpcode() == X86ISD::TLSADDR);
+  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
+  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+  
+  X86ISelAddressMode AM;
+  AM.GV = GA->getGlobal();
+  AM.Disp += GA->getOffset();
+  AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
+  
+  if (N.getValueType() == MVT::i32) {
+    AM.Scale = 1;
+    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
+  } else {
+    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
+  }
+  
+  SDValue Segment;
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
+}
+
+
 bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
                                   SDValue &Base, SDValue &Scale,
                                   SDValue &Index, SDValue &Disp,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 36e3ab2..8d0ea66 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -788,8 +788,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::USUBO, MVT::i64, Custom);
   setOperationAction(ISD::SMULO, MVT::i32, Custom);
   setOperationAction(ISD::SMULO, MVT::i64, Custom);
-  setOperationAction(ISD::UMULO, MVT::i32, Custom);
-  setOperationAction(ISD::UMULO, MVT::i64, Custom);
 
   if (!Subtarget->is64Bit()) {
     // These libcalls are not available in 32-bit.
@@ -4439,9 +4437,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
   // exec)
-  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
-                                             GA->getValueType(0),
-                                             GA->getOffset());
+  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+                                           GA->getOffset());
   SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
 
   if (model == TLSModel::InitialExec)
@@ -8474,6 +8471,14 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       }
     }
     return;
+  case 'K':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
   case 'N':
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
       if (C->getZExtValue() <= 255) {
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index dc15e4a..063913f 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -19,6 +19,14 @@
 
 // 64-bits but only 32 bits are significant.
 def i64i32imm  : Operand<i64>;
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+  let PrintMethod = "print_pcrel_imm";
+}
+
+
 // 64-bits but only 8 bits are significant.
 def i64i8imm   : Operand<i64>;
 
@@ -29,6 +37,7 @@ def lea64mem : Operand<i64> {
 
 def lea64_32mem : Operand<i32> {
   let PrintMethod = "printlea64_32mem";
+  let AsmOperandLowerMethod = "lower_lea64_32mem";
   let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
 }
 
@@ -39,6 +48,9 @@ def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
                         [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper],
                         []>;
 
+def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments.
 //
@@ -113,9 +125,9 @@ let isCall = 1 in
     // NOTE: this pattern doesn't match "X86call imm", because we do not know
     // that the offset between an arbitrary immediate and the call will fit in
     // the 32-bit pcrel field that we have.
-    def CALL64pcrel32 : I<0xE8, RawFrm,
-                          (outs), (ins i64i32imm:$dst, variable_ops),
-                          "call\t${dst:call}", []>,
+    def CALL64pcrel32 : Ii32<0xE8, RawFrm,
+                          (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                          "call\t$dst", []>,
                         Requires<[In64BitMode]>;
     def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
                           "call\t{*}$dst", [(X86call GR64:$dst)]>;
@@ -177,6 +189,15 @@ def PUSH64r  : I<0x50, AddRegFrm,
                  (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
 }
 
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
+                     "push{q}\t$imm", []>;
+def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+                      "push{q}\t$imm", []>;
+def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+                      "push{q}\t$imm", []>;
+}
+
 let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
 def POPFQ    : I<0x9D, RawFrm, (outs), (ins), "popf", []>, REX_W;
 let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
@@ -1312,13 +1333,13 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
     Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins i64imm:$sym),
+def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
                    ".byte\t0x66; "
-                   "leaq\t${sym:mem}(%rip), %rdi; "
+                   "leaq\t$sym(%rip), %rdi; "
                    ".word\t0x6666; "
                    "rex64; "
                    "call\t__tls_get_addr@PLT",
-                  [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+                  [(X86tlsaddr tls64addr:$sym)]>,
                   Requires<[In64BitMode]>;
 
 let AddedComplexity = 5 in
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 50ae417..2d8f55f 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -163,6 +163,11 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 // X86 Operand Definitions.
 //
 
+def i32imm_pcrel : Operand<i32> {
+  let PrintMethod = "print_pcrel_imm";
+}
+
+
 // *mem - Operand definitions for the funky X86 addressing mode operands.
 //
 class X86MemOperand<string printMethod> : Operand<iPTR> {
@@ -206,8 +211,10 @@ def i16i8imm  : Operand<i16>;
 // 32-bits but only 8 bits are significant.
 def i32i8imm  : Operand<i32>;
 
-// Branch targets have OtherVT type.
-def brtarget : Operand<OtherVT>;
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT> {
+  let PrintMethod = "print_pcrel_imm";
+}
 
 //===----------------------------------------------------------------------===//
 // X86 Complex Pattern Definitions.
@@ -217,6 +224,8 @@ def brtarget : Operand<OtherVT>;
 def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
 def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
                                [add, sub, mul, shl, or, frameindex], []>;
+def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
@@ -561,8 +570,9 @@ let isCall = 1 in
               XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
               XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
       Uses = [ESP] in {
-    def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops),
-                           "call\t${dst:call}", []>;
+    def CALLpcrel32 : Ii32<0xE8, RawFrm,
+                           (outs), (ins i32imm_pcrel:$dst,variable_ops),
+                           "call\t$dst", []>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
                         "call\t{*}$dst", [(X86call GR32:$dst)]>;
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
@@ -587,7 +597,7 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
 
-  def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call}  # TAILCALL",
+  def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst  # TAILCALL",
                  []>;
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
   def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst  # TAILCALL",
@@ -611,6 +621,15 @@ let mayStore = 1 in
 def PUSH32r  : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
 }
 
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH32i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), 
+                     "push{l}\t$imm", []>;
+def PUSH32i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), 
+                      "push{l}\t$imm", []>;
+def PUSH32i32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), 
+                      "push{l}\t$imm", []>;
+}
+
 let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in
 def POPFD    : I<0x9D, RawFrm, (outs), (ins), "popf", []>;
 let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
@@ -1726,13 +1745,13 @@ let isTwoAddress = 0 in {
 let Defs = [EFLAGS] in {
 let Uses = [CL] in {
 def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "shl{b}\t{%cl, $dst|$dst, %CL}",
+                 "shl{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (shl GR8:$src, CL))]>;
 def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
-                 "shl{w}\t{%cl, $dst|$dst, %CL}",
+                 "shl{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
 def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
-                 "shl{l}\t{%cl, $dst|$dst, %CL}",
+                 "shl{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (shl GR32:$src, CL))]>;
 } // Uses = [CL]
 
@@ -1753,13 +1772,13 @@ def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
-                   "shl{b}\t{%cl, $dst|$dst, %CL}",
+                   "shl{b}\t{%cl, $dst|$dst, CL}",
                    [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
   def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
-                   "shl{w}\t{%cl, $dst|$dst, %CL}",
+                   "shl{w}\t{%cl, $dst|$dst, CL}",
                    [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
-                   "shl{l}\t{%cl, $dst|$dst, %CL}",
+                   "shl{l}\t{%cl, $dst|$dst, CL}",
                    [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1788,13 +1807,13 @@ let isTwoAddress = 0 in {
 
 let Uses = [CL] in {
 def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "shr{b}\t{%cl, $dst|$dst, %CL}",
+                 "shr{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (srl GR8:$src, CL))]>;
 def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
-                 "shr{w}\t{%cl, $dst|$dst, %CL}",
+                 "shr{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
 def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
-                 "shr{l}\t{%cl, $dst|$dst, %CL}",
+                 "shr{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (srl GR32:$src, CL))]>;
 }
 
@@ -1822,14 +1841,14 @@ def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
-                   "shr{b}\t{%cl, $dst|$dst, %CL}",
+                   "shr{b}\t{%cl, $dst|$dst, CL}",
                    [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
   def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
-                   "shr{w}\t{%cl, $dst|$dst, %CL}",
+                   "shr{w}\t{%cl, $dst|$dst, CL}",
                    [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
                    OpSize;
   def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
-                   "shr{l}\t{%cl, $dst|$dst, %CL}",
+                   "shr{l}\t{%cl, $dst|$dst, CL}",
                    [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1857,13 +1876,13 @@ let isTwoAddress = 0 in {
 
 let Uses = [CL] in {
 def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "sar{b}\t{%cl, $dst|$dst, %CL}",
+                 "sar{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (sra GR8:$src, CL))]>;
 def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
-                 "sar{w}\t{%cl, $dst|$dst, %CL}",
+                 "sar{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
 def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
-                 "sar{l}\t{%cl, $dst|$dst, %CL}",
+                 "sar{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (sra GR32:$src, CL))]>;
 }
 
@@ -1892,13 +1911,13 @@ def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
-                   "sar{b}\t{%cl, $dst|$dst, %CL}",
+                   "sar{b}\t{%cl, $dst|$dst, CL}",
                    [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
   def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
-                   "sar{w}\t{%cl, $dst|$dst, %CL}",
+                   "sar{w}\t{%cl, $dst|$dst, CL}",
                    [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
-                   "sar{l}\t{%cl, $dst|$dst, %CL}",
+                   "sar{l}\t{%cl, $dst|$dst, CL}",
                    [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1929,13 +1948,13 @@ let isTwoAddress = 0 in {
 // FIXME: provide shorter instructions when imm8 == 1
 let Uses = [CL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "rol{b}\t{%cl, $dst|$dst, %CL}",
+                 "rol{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (rotl GR8:$src, CL))]>;
 def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
-                 "rol{w}\t{%cl, $dst|$dst, %CL}",
+                 "rol{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
 def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
-                 "rol{l}\t{%cl, $dst|$dst, %CL}",
+                 "rol{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (rotl GR32:$src, CL))]>;
 }
 
@@ -1963,13 +1982,13 @@ def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
-                   "rol{b}\t{%cl, $dst|$dst, %CL}",
+                   "rol{b}\t{%cl, $dst|$dst, CL}",
                    [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
   def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
-                   "rol{w}\t{%cl, $dst|$dst, %CL}",
+                   "rol{w}\t{%cl, $dst|$dst, CL}",
                    [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
-                   "rol{l}\t{%cl, $dst|$dst, %CL}",
+                   "rol{l}\t{%cl, $dst|$dst, CL}",
                    [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1998,13 +2017,13 @@ let isTwoAddress = 0 in {
 
 let Uses = [CL] in {
 def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
-                 "ror{b}\t{%cl, $dst|$dst, %CL}",
+                 "ror{b}\t{%cl, $dst|$dst, CL}",
                  [(set GR8:$dst, (rotr GR8:$src, CL))]>;
 def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
-                 "ror{w}\t{%cl, $dst|$dst, %CL}",
+                 "ror{w}\t{%cl, $dst|$dst, CL}",
                  [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
 def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
-                 "ror{l}\t{%cl, $dst|$dst, %CL}",
+                 "ror{l}\t{%cl, $dst|$dst, CL}",
                  [(set GR32:$dst, (rotr GR32:$src, CL))]>;
 }
 
@@ -2032,13 +2051,13 @@ def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
-                   "ror{b}\t{%cl, $dst|$dst, %CL}",
+                   "ror{b}\t{%cl, $dst|$dst, CL}",
                    [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
   def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
-                   "ror{w}\t{%cl, $dst|$dst, %CL}",
+                   "ror{w}\t{%cl, $dst|$dst, CL}",
                    [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
   def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
-                   "ror{l}\t{%cl, $dst|$dst, %CL}",
+                   "ror{l}\t{%cl, $dst|$dst, CL}",
                    [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
   }
   def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -2070,17 +2089,17 @@ let isTwoAddress = 0 in {
 // Double shift instructions (generalizations of rotate)
 let Uses = [CL] in {
 def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
 def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
 def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
                    TB, OpSize;
 def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
-                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
                    TB, OpSize;
 }
@@ -2115,11 +2134,11 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
 let isTwoAddress = 0 in {
   let Uses = [CL] in {
   def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                     "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                     "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                      [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
                        addr:$dst)]>, TB;
   def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
-                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                     [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
                       addr:$dst)]>, TB;
   }
@@ -2138,11 +2157,11 @@ let isTwoAddress = 0 in {
 
   let Uses = [CL] in {
   def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                     "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                     "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                      [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
                        addr:$dst)]>, TB, OpSize;
   def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
-                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                     [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
                       addr:$dst)]>, TB, OpSize;
   }
@@ -3095,11 +3114,11 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
             MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-    Uses = [ESP, EBX] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins i32imm:$sym),
-                  "leal\t${sym:mem}(,%ebx,1), %eax; "
+    Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym),
+                  "leal\t$sym, %eax; "
                   "call\t___tls_get_addr@PLT",
-                  [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+                  [(X86tlsaddr tls32addr:$sym)]>,
                   Requires<[In32BitMode]>;
 
 let AddedComplexity = 5 in
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b44c7a6..5d6ef36 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3027,6 +3027,12 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
           (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
 }
 
+// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+// fall back to this for SSE1)
+def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+          (SHUFPSrri VR128:$src2, VR128:$src1, 
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
+
 // Set lowest element and zero upper elements.
 let AddedComplexity = 15 in
 def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 6c0074e..a2f319f 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -662,6 +662,10 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
      TargetFrameInfo::StackGrowsUp ?
      TD->getPointerSize() : -TD->getPointerSize());
 
+  MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
+  MachineLocation FPSrc(MachineLocation::VirtualFP);
+  Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+
   if (StackSize) {
     // Show update of SP.
     if (hasFP(MF)) {
@@ -676,7 +680,7 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
       Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
     }
   } else {
-    //FIXME: Verify & implement for FP
+    // FIXME: Verify & implement for FP
     MachineLocation SPDst(StackPtr);
     MachineLocation SPSrc(StackPtr, stackGrowth);
     Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
@@ -711,10 +715,6 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
     MachineLocation FPSrc(FramePtr);
     Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
   }
-
-  MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
-  MachineLocation FPSrc(MachineLocation::VirtualFP);
-  Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
 }
 
 
@@ -729,8 +729,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
   bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
                           !Fn->doesNotThrow() ||
                           UnwindTablesMandatory;
-  DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
-                 DebugLoc::getUnknownLoc());
+  DebugLoc DL;
 
   // Prepare for frame info.
   unsigned FrameLabelId = 0;
@@ -822,13 +821,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
 
-  unsigned ReadyLabelId = 0;
-  if (needsFrameMoves) {
-    // Mark effective beginning of when frame pointer is ready.
-    ReadyLabelId = MMI->NextLabelID();
-    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
-  }
-
   // Skip the callee-saved push instructions.
   while (MBBI != MBB.end() &&
          (MBBI->getOpcode() == X86::PUSH32r ||
@@ -891,8 +883,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
       emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
   }
 
-  if (needsFrameMoves)
+  if (needsFrameMoves) {
+    unsigned ReadyLabelId = 0;
+    // Mark effective beginning of when frame pointer is ready.
+    ReadyLabelId = MMI->NextLabelID();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
     emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+  }
 }
 
 void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 46476f2..694b0eb 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -173,7 +173,7 @@ public:
   bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
   bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; }
   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
-  bool isPICStyleWinPIC() const { return PICStyle == PICStyles:: WinPIC; }
+  bool isPICStyleWinPIC() const { return PICStyle == PICStyles::WinPIC; }
   
   /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
   unsigned getDarwinVers() const { return DarwinVers; }
diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp
index 5dda5f4..f49ca15 100644
--- a/lib/Target/X86/X86TargetAsmInfo.cpp
+++ b/lib/Target/X86/X86TargetAsmInfo.cpp
@@ -44,40 +44,25 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
 
   AlignmentIsInBytes = false;
   TextAlignFillValue = 0x90;
-  GlobalPrefix = "_";
+    
+    
   if (!is64Bit)
     Data64bitsDirective = 0;       // we can't emit a 64-bit unit
   ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
-  PrivateGlobalPrefix = "L";     // Marker for constant pool idxs
-  LessPrivateGlobalPrefix = "l";  // Marker for some ObjC metadata
-  BSSSection = 0;                       // no BSS section.
   ZeroFillDirective = "\t.zerofill\t";  // Uses .zerofill
   if (TM.getRelocationModel() != Reloc::Static)
     ConstantPoolSection = "\t.const_data";
   else
     ConstantPoolSection = "\t.const\n";
-  JumpTableDataSection = "\t.const\n";
-  CStringSection = "\t.cstring";
-  // FIXME: Why don't always use this section?
-  if (is64Bit) {
+  // FIXME: Why don't we always use this section?
+  if (is64Bit)
     SixteenByteConstantSection = getUnnamedSection("\t.literal16\n",
                                                    SectionFlags::Mergeable);
-  }
   LCOMMDirective = "\t.lcomm\t";
-  SwitchToSectionDirective = "\t.section ";
-  StringConstantPrefix = "\1LC";
   // Leopard and above support aligned common symbols.
   COMMDirectiveTakesAlignment = (Subtarget->getDarwinVers() >= 9);
   HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
   NonLocalEHFrameLabel = true;
-  if (TM.getRelocationModel() == Reloc::Static) {
-    StaticCtorsSection = ".constructor";
-    StaticDtorsSection = ".destructor";
-  } else {
-    StaticCtorsSection = ".mod_init_func";
-    StaticDtorsSection = ".mod_term_func";
-  }
   if (is64Bit) {
     PersonalityPrefix = "";
     PersonalitySuffix = "+4@GOTPCREL";
@@ -85,40 +70,18 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
     PersonalityPrefix = "L";
     PersonalitySuffix = "$non_lazy_ptr";
   }
-  NeedsIndirectEncoding = true;
   InlineAsmStart = "## InlineAsm Start";
   InlineAsmEnd = "## InlineAsm End";
   CommentString = "##";
   SetDirective = "\t.set";
   PCSymbol = ".";
   UsedDirective = "\t.no_dead_strip\t";
-  WeakDefDirective = "\t.weak_definition ";
-  WeakRefDirective = "\t.weak_reference ";
-  HiddenDirective = "\t.private_extern ";
   ProtectedDirective = "\t.globl\t";
 
-  // In non-PIC modes, emit a special label before jump tables so that the
-  // linker can perform more accurate dead code stripping.
-  if (TM.getRelocationModel() != Reloc::PIC_) {
-    // Emit a local label that is preserved until the linker runs.
-    JumpTableSpecialLabelPrefix = "l";
-  }
-
   SupportsDebugInformation = true;
-  NeedsSet = true;
-  DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
-  DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
-  DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
-  DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
-  DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
-  DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+
   DwarfDebugInlineSection = ".section __DWARF,__debug_inlined,regular,debug";
   DwarfUsesInlineInfoSection = true;
-  DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
-  DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
-  DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
-  DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
-  DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
 
   // Exceptions handling
   SupportsExceptionHandling = true;
@@ -176,7 +139,7 @@ X86ELFTargetAsmInfo::X86ELFTargetAsmInfo(const X86TargetMachine &TM):
   DwarfLocSection =     "\t.section\t.debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
 
   // Exceptions handling
   SupportsExceptionHandling = true;
@@ -259,7 +222,7 @@ X86COFFTargetAsmInfo::X86COFFTargetAsmInfo(const X86TargetMachine &TM):
   DwarfLocSection =     "\t.section\t.debug_loc,\"dr\"";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"dr\"";
   DwarfRangesSection =  "\t.section\t.debug_ranges,\"dr\"";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
 }
 
 unsigned
@@ -340,8 +303,11 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
   GlobalPrefix = "_";
   CommentString = ";";
 
+  InlineAsmStart = "; InlineAsm Start";
+  InlineAsmEnd   = "; InlineAsm End";
+
   PrivateGlobalPrefix = "$";
-  AlignDirective = "\talign\t";
+  AlignDirective = "\tALIGN\t";
   ZeroDirective = "\tdb\t";
   ZeroDirectiveSuffix = " dup(0)";
   AsciiDirective = "\tdb\t";
@@ -353,13 +319,15 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
   HasDotTypeDotSizeDirective = false;
   HasSingleParameterDotFile = false;
 
+  AlignmentIsInBytes = true;
+
   TextSection = getUnnamedSection("_text", SectionFlags::Code);
   DataSection = getUnnamedSection("_data", SectionFlags::Writeable);
 
   JumpTableDataSection = NULL;
   SwitchToSectionDirective = "";
-  TextSectionStartSuffix = "\tsegment 'CODE'";
-  DataSectionStartSuffix = "\tsegment 'DATA'";
+  TextSectionStartSuffix = "\tSEGMENT PARA 'CODE'";
+  DataSectionStartSuffix = "\tSEGMENT PARA 'DATA'";
   SectionEndDirectiveSuffix = "\tends\n";
 }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index dfb055f..53c46c3 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -36,6 +36,11 @@ X("x86",    "32-bit X86: Pentium-Pro and above");
 static RegisterTarget<X86_64TargetMachine>
 Y("x86-64", "64-bit X86: EM64T and AMD64");
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeX86Target() { }
+}
+
 // No assembler printer by default
 X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
 
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index c9a6d8a..ed4c101 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -428,6 +428,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 
 bool XCoreAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
+  DW = getAnalysisIfAvailable<DwarfWriter>();
   
   if (!FileDirective.empty()) {
     emitFileDirective(FileDirective);
@@ -449,11 +450,6 @@ bool XCoreAsmPrinter::doInitialization(Module &M) {
     }
   }
 
-  // Emit initial debug information.
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  assert(DW && "Dwarf Writer is not available");
-  DW->BeginModule(&M, getAnalysisIfAvailable<MachineModuleInfo>(),
-                  O, this, TAI);
   return Result;
 }
 
diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.cpp b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
index 5513762..59ad624 100644
--- a/lib/Target/XCore/XCoreTargetAsmInfo.cpp
+++ b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
@@ -24,6 +24,7 @@ using namespace llvm;
 XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
   : ELFTargetAsmInfo(TM),
     Subtarget(TM.getSubtargetImpl()) {
+  SupportsDebugInformation = true;
   TextSection = getUnnamedSection("\t.text", SectionFlags::Code);
   DataSection = getNamedSection("\t.dp.data", SectionFlags::Writeable |
                                 SectionFlags::Small);
@@ -64,7 +65,7 @@ XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
   DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
   DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
   DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
-  DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+  DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
 }
 
 const Section*
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 5437c57..cfd3cd3 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -31,6 +31,11 @@ namespace {
   RegisterTarget<XCoreTargetMachine> X("xcore", "XCore");
 }
 
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+  void InitializeXCoreTarget() { }
+}
+
 const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const {
   return new XCoreTargetAsmInfo(*this);
 }
author	ed <ed@FreeBSD.org>	2009-06-22 08:08:12 +0000
committer	ed <ed@FreeBSD.org>	2009-06-22 08:08:12 +0000
commit	a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe (patch)
tree	86c1bc482baa6c81fc70b8d715153bfa93377186 /lib/Target
parent	db89e312d968c258aba3c79c1c398f5fb19267a3 (diff)
download	FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.zip FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.tar.gz