17 files changed, 1159 insertions, 474 deletions
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index aae5da8..ee29275 100644
--- a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -7,21 +7,32 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This is a simple local pass that fills delay slots with NOPs.
-//
+// This is a simple local pass that attempts to fill delay slots with useful
+// instructions. If no instructions can be moved into the delay slot, then a
+// NOP is placed.
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "delayslotfiller"
+#define DEBUG_TYPE "delay-slot-filler"
 #include "Sparc.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+
 using namespace llvm;
 
 STATISTIC(FilledSlots, "Number of delay slots filled");
 
+static cl::opt<bool> DisableDelaySlotFiller(
+  "disable-sparc-delay-filler",
+  cl::init(false),
+  cl::desc("Disable the Sparc delay slot filler."),
+  cl::Hidden);
+
 namespace {
   struct Filler : public MachineFunctionPass {
     /// Target machine description which we query for reg. names, data
@@ -47,6 +58,28 @@ namespace {
       return Changed;
     }
 
+    bool isDelayFiller(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator candidate);
+
+    void insertCallUses(MachineBasicBlock::iterator MI,
+                        SmallSet<unsigned, 32>& RegUses);
+
+    void insertDefsUses(MachineBasicBlock::iterator MI,
+                        SmallSet<unsigned, 32>& RegDefs,
+                        SmallSet<unsigned, 32>& RegUses);
+
+    bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+                    unsigned Reg);
+
+    bool delayHasHazard(MachineBasicBlock::iterator candidate,
+                        bool &sawLoad, bool &sawStore,
+                        SmallSet<unsigned, 32> &RegDefs,
+                        SmallSet<unsigned, 32> &RegUses);
+
+    MachineBasicBlock::iterator
+    findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
+
+
   };
   char Filler::ID = 0;
 } // end of anonymous namespace
@@ -59,18 +92,201 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
 }
 
 /// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
-/// Currently, we fill delay slots with NOPs. We assume there is only one
-/// delay slot per delayed instruction.
+/// We assume there is only one delay slot per delayed instruction.
 ///
 bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
+
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
     if (I->getDesc().hasDelaySlot()) {
+      MachineBasicBlock::iterator D = MBB.end();
       MachineBasicBlock::iterator J = I;
-      ++J;
-      BuildMI(MBB, J, DebugLoc(), TII->get(SP::NOP));
+
+      if (!DisableDelaySlotFiller)
+        D = findDelayInstr(MBB, I);
+
       ++FilledSlots;
       Changed = true;
+
+      if (D == MBB.end())
+        BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
+      else
+        MBB.splice(++J, &MBB, D);
     }
   return Changed;
 }
+
+MachineBasicBlock::iterator
+Filler::findDelayInstr(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator slot)
+{
+  SmallSet<unsigned, 32> RegDefs;
+  SmallSet<unsigned, 32> RegUses;
+  bool sawLoad = false;
+  bool sawStore = false;
+
+  MachineBasicBlock::iterator I = slot;
+
+  if (slot->getOpcode() == SP::RET)
+    return MBB.end();
+
+  if (slot->getOpcode() == SP::RETL) {
+    --I;
+    if (I->getOpcode() != SP::RESTORErr)
+      return MBB.end();
+    //change retl to ret
+    slot->setDesc(TII->get(SP::RET));
+    return I;
+  }
+
+  //Call's delay filler can def some of call's uses.
+  if (slot->getDesc().isCall())
+    insertCallUses(slot, RegUses);
+  else
+    insertDefsUses(slot, RegDefs, RegUses);
+
+  bool done = false;
+
+  while (!done) {
+    done = (I == MBB.begin());
+
+    if (!done)
+      --I;
+
+    // skip debug value
+    if (I->isDebugValue())
+      continue;
+
+
+    if (I->hasUnmodeledSideEffects()
+        || I->isInlineAsm()
+        || I->isLabel()
+        || I->getDesc().hasDelaySlot()
+        || isDelayFiller(MBB, I))
+      break;
+
+    if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) {
+      insertDefsUses(I, RegDefs, RegUses);
+      continue;
+    }
+
+    return I;
+  }
+  return MBB.end();
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+                            bool &sawLoad,
+                            bool &sawStore,
+                            SmallSet<unsigned, 32> &RegDefs,
+                            SmallSet<unsigned, 32> &RegUses)
+{
+
+  if (candidate->isImplicitDef() || candidate->isKill())
+    return true;
+
+  if (candidate->getDesc().mayLoad()) {
+    sawLoad = true;
+    if (sawStore)
+      return true;
+  }
+
+  if (candidate->getDesc().mayStore()) {
+    if (sawStore)
+      return true;
+    sawStore = true;
+    if (sawLoad)
+      return true;
+  }
+
+  for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+    const MachineOperand &MO = candidate->getOperand(i);
+    if (!MO.isReg())
+      continue; // skip
+
+    unsigned Reg = MO.getReg();
+
+    if (MO.isDef()) {
+      //check whether Reg is defined or used before delay slot.
+      if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+        return true;
+    }
+    if (MO.isUse()) {
+      //check whether Reg is defined before delay slot.
+      if (IsRegInSet(RegDefs, Reg))
+        return true;
+    }
+  }
+  return false;
+}
+
+
+void Filler::insertCallUses(MachineBasicBlock::iterator MI,
+                            SmallSet<unsigned, 32>& RegUses)
+{
+
+  switch(MI->getOpcode()) {
+  default: llvm_unreachable("Unknown opcode.");
+  case SP::CALL: break;
+  case SP::JMPLrr:
+  case SP::JMPLri:
+    assert(MI->getNumOperands() >= 2);
+    const MachineOperand &Reg = MI->getOperand(0);
+    assert(Reg.isReg() && "JMPL first operand is not a register.");
+    assert(Reg.isUse() && "JMPL first operand is not a use.");
+    RegUses.insert(Reg.getReg());
+
+    const MachineOperand &RegOrImm = MI->getOperand(1);
+    if (RegOrImm.isImm())
+        break;
+    assert(RegOrImm.isReg() && "JMPLrr second operand is not a register.");
+    assert(RegOrImm.isUse() && "JMPLrr second operand is not a use.");
+    RegUses.insert(RegOrImm.getReg());
+    break;
+  }
+}
+
+//Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+                            SmallSet<unsigned, 32>& RegDefs,
+                            SmallSet<unsigned, 32>& RegUses)
+{
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+    if (MO.isDef())
+      RegDefs.insert(Reg);
+    if (MO.isUse())
+      RegUses.insert(Reg);
+
+  }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
+{
+  if (RegSet.count(Reg))
+    return true;
+  // check Aliased Registers
+  for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+       *Alias; ++ Alias)
+    if (RegSet.count(*Alias))
+      return true;
+
+  return false;
+}
+
+// return true if the candidate is a delay filler.
+bool Filler::isDelayFiller(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator candidate)
+{
+  if (candidate == MBB.begin())
+    return false;
+  const TargetInstrDesc &prevdesc = (--candidate)->getDesc();
+  return prevdesc.hasDelaySlot();
+}
diff --git a/contrib/llvm/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index ab948bb..edde842 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -145,6 +145,8 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
   case MachineOperand::MO_Register:
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            "Operand is not a physical register ");
+    assert(MO.getReg() != SP::O7 && 
+           "%o7 is assigned as destination for getpcx!");
     operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
     break;
   }
@@ -156,8 +158,8 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
   O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
 
   O << "\t  sethi\t"
-    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), "  
-    << operand << '\n' ;
+    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum 
+    << ")), "  << operand << '\n' ;
 
   O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
   O << "\tor\t" << operand  
diff --git a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
index 33ecfdf..856f87a 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcCallingConv.td
@@ -24,9 +24,13 @@ def RetCC_Sparc32 : CallingConv<[
 
 // Sparc 32-bit C Calling convention.
 def CC_Sparc32 : CallingConv<[
-  // All arguments get passed in integer registers if there is space.
-  CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
-  
+  //Custom assign SRet to [sp+64].
+  CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
+  // i32 f32 arguments get passed in integer registers if there is space.
+  CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  // f64 arguments are split and passed through registers or through stack.
+  CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+
   // Alternatively, they are assigned to the stack in 4-byte aligned units.
   CCAssignToStack<4, 4>
 ]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
new file mode 100644
index 0000000..320c8ca
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -0,0 +1,80 @@
+//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  int NumBytes = (int) MFI->getStackSize();
+
+  // Emit the correct save instruction based on the number of bytes in
+  // the frame. Minimum stack frame size according to V8 ABI is:
+  //   16 words for register window spill
+  //    1 word for address of returned aggregate-value
+  // +  6 words for passing parameters on the stack
+  // ----------
+  //   23 words * 4 bytes per word = 92 bytes
+  NumBytes += 92;
+
+  // Round up to next doubleword boundary -- a double-word boundary
+  // is required by the ABI.
+  NumBytes = (NumBytes + 7) & ~7;
+  NumBytes = -NumBytes;
+
+  if (NumBytes >= -4096) {
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+      .addReg(SP::O6).addImm(NumBytes);
+  } else {
+    // Emit this the hard way.  This clobbers G1 which we always know is
+    // available here.
+    unsigned OffHi = (unsigned)NumBytes >> 10U;
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+    // Emit G1 = G1 + I6
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+      .addReg(SP::O6).addReg(SP::G1);
+  }
+}
+
+void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+  assert(MBBI->getOpcode() == SP::RETL &&
+         "Can only put epilog before 'retl' instruction!");
+  BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+    .addReg(SP::G0);
+}
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
new file mode 100644
index 0000000..9a2ddc8
--- /dev/null
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h
@@ -0,0 +1,41 @@
+//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_FRAMEINFO_H
+#define SPARC_FRAMEINFO_H
+
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class SparcSubtarget;
+
+class SparcFrameLowering : public TargetFrameLowering {
+  const SparcSubtarget &STI;
+public:
+  explicit SparcFrameLowering(const SparcSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 4ea94c4..8c6103d 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -44,9 +44,8 @@ public:
   SDNode *Select(SDNode *N);
 
   // Complex Pattern Selectors.
-  bool SelectADDRrr(SDNode *Op, SDValue N, SDValue &R1, SDValue &R2);
-  bool SelectADDRri(SDNode *Op, SDValue N, SDValue &Base,
-                    SDValue &Offset);
+  bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
 
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
@@ -71,7 +70,7 @@ SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
 
-bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
                                      SDValue &Base, SDValue &Offset) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -112,8 +111,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
   return true;
 }
 
-bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr,
-                                     SDValue &R1,  SDValue &R2) {
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
   if (Addr.getOpcode() == ISD::FrameIndex) return false;
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
       Addr.getOpcode() == ISD::TargetGlobalAddress)
@@ -160,7 +158,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
     } else {
       TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
     }
-    TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Flag, TopPart,
+    TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart,
                                      CurDAG->getRegister(SP::G0, MVT::i32)), 0);
 
     // FIXME: Handle div by immediate.
@@ -174,7 +172,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
     SDValue MulLHS = N->getOperand(0);
     SDValue MulRHS = N->getOperand(1);
     unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
-    SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Flag,
+    SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
                                          MulLHS, MulRHS);
     // The high part is in the Y register.
     return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
@@ -196,8 +194,8 @@ SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
   switch (ConstraintCode) {
   default: return true;
   case 'm':   // memory
-   if (!SelectADDRrr(Op.getNode(), Op, Op0, Op1))
-     SelectADDRri(Op.getNode(), Op, Op0, Op1);
+   if (!SelectADDRrr(Op, Op0, Op1))
+     SelectADDRri(Op, Op0, Op1);
    break;
   }
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 4099a62..196b87d 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1,3 +1,4 @@
+
 //===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -32,6 +33,47 @@ using namespace llvm;
 // Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
+static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
+                                 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                 ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  assert (ArgFlags.isSRet());
+
+  //Assign SRet argument
+  State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                         0,
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
+                                MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  static const unsigned RegList[] = {
+    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+  };
+  //Try to get first reg
+  if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  } else {
+    //Assign whole thing in stack
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8,4),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  //Try to get second reg
+  if (unsigned Reg = State.AllocateReg(RegList, 6))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(4,4),
+                                           LocVT, LocInfo));
+  return true;
+}
+
 #include "SparcGenCallingConv.inc"
 
 SDValue
@@ -41,6 +83,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
                                  const SmallVectorImpl<SDValue> &OutVals,
                                  DebugLoc dl, SelectionDAG &DAG) const {
 
+  MachineFunction &MF = DAG.getMachineFunction();
+
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
 
@@ -53,10 +97,10 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
 
   // If this is the first return lowered for this function, add the regs to the
   // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+  if (MF.getRegInfo().liveout_empty()) {
     for (unsigned i = 0; i != RVLocs.size(); ++i)
       if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+        MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   }
 
   SDValue Flag;
@@ -66,12 +110,24 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
                              OutVals[i], Flag);
 
     // Guarantee that all emitted copies are stuck together with flags.
     Flag = Chain.getValue(1);
   }
+  // If the function returns a struct, copy the SRetReturnReg to I0
+  if (MF.getFunction()->hasStructRetAttr()) {
+    SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+    unsigned Reg = SFI->getSRetReturnReg();
+    if (!Reg)
+      llvm_unreachable("sret virtual register not created in the entry block");
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+    Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+    Flag = Chain.getValue(1);
+    if (MF.getRegInfo().liveout_empty())
+      MF.getRegInfo().addLiveOut(SP::I0);
+  }
 
   if (Flag.getNode())
     return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
@@ -100,135 +156,159 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
                  ArgLocs, *DAG.getContext());
   CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
 
-  static const unsigned ArgRegs[] = {
-    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
-  };
-  const unsigned *CurArgReg = ArgRegs, *ArgRegEnd = ArgRegs+6;
-  unsigned ArgOffset = 68;
+  const unsigned StackOffset = 92;
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    SDValue ArgValue;
     CCValAssign &VA = ArgLocs[i];
-    // FIXME: We ignore the register assignments of AnalyzeFormalArguments
-    // because it doesn't know how to split a double into two i32 registers.
-    EVT ObjectVT = VA.getValVT();
-    switch (ObjectVT.getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unhandled argument type!");
-    case MVT::i1:
-    case MVT::i8:
-    case MVT::i16:
-    case MVT::i32:
-      if (!Ins[i].Used) {                  // Argument is dead.
-        if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        InVals.push_back(DAG.getUNDEF(ObjectVT));
-      } else if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
-        unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
-        MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
-        SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-        if (ObjectVT != MVT::i32) {
-          unsigned AssertOp = ISD::AssertSext;
-          Arg = DAG.getNode(AssertOp, dl, MVT::i32, Arg,
-                            DAG.getValueType(ObjectVT));
-          Arg = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Arg);
-        }
-        InVals.push_back(Arg);
-      } else {
-        int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
-                                                            true);
-        SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-        SDValue Load;
-        if (ObjectVT == MVT::i32) {
-          Load = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
-                             false, false, 0);
-        } else {
-          ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
-
-          // Sparc is big endian, so add an offset based on the ObjectVT.
-          unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8);
-          FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
-                              DAG.getConstant(Offset, MVT::i32));
-          Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr,
-                                NULL, 0, ObjectVT, false, false, 0);
-          Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
-        }
-        InVals.push_back(Load);
-      }
 
-      ArgOffset += 4;
-      break;
-    case MVT::f32:
-      if (!Ins[i].Used) {                  // Argument is dead.
-        if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        InVals.push_back(DAG.getUNDEF(ObjectVT));
-      } else if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
-        // FP value is passed in an integer register.
-        unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
-        MF.getRegInfo().addLiveIn(*CurArgReg++, VReg);
-        SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
-
-        Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg);
-        InVals.push_back(Arg);
-      } else {
-        int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
-                                                            true);
-        SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-        SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0,
-                                   false, false, 0);
-        InVals.push_back(Load);
-      }
-      ArgOffset += 4;
-      break;
+    if (i == 0  && Ins[i].Flags.isSRet()) {
+      //Get SRet from [%fp+64]
+      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
+      SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+      SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                                MachinePointerInfo(),
+                                false, false, 0);
+      InVals.push_back(Arg);
+      continue;
+    }
 
-    case MVT::i64:
-    case MVT::f64:
-      if (!Ins[i].Used) {                // Argument is dead.
-        if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        if (CurArgReg < ArgRegEnd) ++CurArgReg;
-        InVals.push_back(DAG.getUNDEF(ObjectVT));
-      } else {
-        SDValue HiVal;
-        if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
-          unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
-          MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi);
-          HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
-        } else {
-          int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
-                                                              true);
-          SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
-                              false, false, 0);
-        }
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+
+      if (VA.needsCustom()) {
+        assert(VA.getLocVT() == MVT::f64);
+        unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+        MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
+        SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
+
+        assert(i+1 < e);
+        CCValAssign &NextVA = ArgLocs[++i];
 
         SDValue LoVal;
-        if (CurArgReg < ArgRegEnd) {  // Lives in an incoming GPR
-          unsigned VRegLo = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
-          MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo);
-          LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32);
-        } else {
-          int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
-                                                              true);
+        if (NextVA.isMemLoc()) {
+          int FrameIdx = MF.getFrameInfo()->
+            CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
-          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
+          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                              MachinePointerInfo(),
                               false, false, 0);
+        } else {
+          unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
+                                        &SP::IntRegsRegClass, dl);
+          LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
         }
-
-        // Compose the two halves together into an i64 unit.
         SDValue WholeValue =
           DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+        WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+        InVals.push_back(WholeValue);
+        continue;
+      }
+      unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
+      SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+      if (VA.getLocVT() == MVT::f32)
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
+      else if (VA.getLocVT() != MVT::i32) {
+        Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
+                          DAG.getValueType(VA.getLocVT()));
+        Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
+      }
+      InVals.push_back(Arg);
+      continue;
+    }
 
-        // If we want a double, do a bit convert.
-        if (ObjectVT == MVT::f64)
-          WholeValue = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, WholeValue);
+    assert(VA.isMemLoc());
 
-        InVals.push_back(WholeValue);
+    unsigned Offset = VA.getLocMemOffset()+StackOffset;
+
+    if (VA.needsCustom()) {
+      assert(VA.getValVT() == MVT::f64);
+      //If it is double-word aligned, just load.
+      if (Offset % 8 == 0) {
+        int FI = MF.getFrameInfo()->CreateFixedObject(8,
+                                                      Offset,
+                                                      true);
+        SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+        SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+                                   MachinePointerInfo(),
+                                   false,false, 0);
+        InVals.push_back(Load);
+        continue;
       }
-      ArgOffset += 8;
-      break;
+
+      int FI = MF.getFrameInfo()->CreateFixedObject(4,
+                                                    Offset,
+                                                    true);
+      SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+      int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
+                                                     Offset+4,
+                                                     true);
+      SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+
+      SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+
+      SDValue WholeValue =
+        DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+      WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+      InVals.push_back(WholeValue);
+      continue;
+    }
+
+    int FI = MF.getFrameInfo()->CreateFixedObject(4,
+                                                  Offset,
+                                                  true);
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue Load ;
+    if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
+      Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+                         MachinePointerInfo(),
+                         false, false, 0);
+    } else {
+      ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+      // Sparc is big endian, so add an offset based on the ObjectVT.
+      unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
+      FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+                          DAG.getConstant(Offset, MVT::i32));
+      Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+                            MachinePointerInfo(),
+                            VA.getValVT(), false, false,0);
+      Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
     }
+    InVals.push_back(Load);
+  }
+
+  if (MF.getFunction()->hasStructRetAttr()) {
+    //Copy the SRet Argument to SRetReturnReg
+    SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+    unsigned Reg = SFI->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
+      SFI->setSRetReturnReg(Reg);
+    }
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
   }
 
   // Store remaining ArgRegs to the stack if this is a varargs function.
   if (isVarArg) {
+    static const unsigned ArgRegs[] = {
+      SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+    };
+    unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
+    const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+    unsigned ArgOffset = CCInfo.getNextStackOffset();
+    if (NumAllocated == 6)
+      ArgOffset += StackOffset;
+    else {
+      assert(!ArgOffset);
+      ArgOffset = 68+4*NumAllocated;
+    }
+
     // Remember the vararg offset for the va_start implementation.
     FuncInfo->setVarArgsFrameOffset(ArgOffset);
 
@@ -243,7 +323,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
                                                           true);
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
 
-      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0,
+      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr,
+                                       MachinePointerInfo(),
                                        false, false, 0));
       ArgOffset += 4;
     }
@@ -270,191 +351,180 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   // Sparc target does not yet support tail call optimization.
   isTailCall = false;
 
-#if 0
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs);
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs,
+                 *DAG.getContext());
   CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
 
   // Get the size of the outgoing arguments stack space requirement.
   unsigned ArgsSize = CCInfo.getNextStackOffset();
-  // FIXME: We can't use this until f64 is known to take two GPRs.
-#else
-  (void)CC_Sparc32;
-
-  // Count the size of the outgoing arguments.
-  unsigned ArgsSize = 0;
-  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    switch (Outs[i].VT.getSimpleVT().SimpleTy) {
-      default: llvm_unreachable("Unknown value type!");
-      case MVT::i1:
-      case MVT::i8:
-      case MVT::i16:
-      case MVT::i32:
-      case MVT::f32:
-        ArgsSize += 4;
-        break;
-      case MVT::i64:
-      case MVT::f64:
-        ArgsSize += 8;
-        break;
-    }
-  }
-  if (ArgsSize > 4*6)
-    ArgsSize -= 4*6;    // Space for first 6 arguments is prereserved.
-  else
-    ArgsSize = 0;
-#endif
 
   // Keep stack frames 8-byte aligned.
   ArgsSize = (ArgsSize+7) & ~7;
 
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+  //Create local copies for byval args.
+  SmallVector<SDValue, 8> ByValArgs;
+  for (unsigned i = 0,  e = Outs.size(); i != e; ++i) {
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    if (!Flags.isByVal())
+      continue;
+
+    SDValue Arg = OutVals[i];
+    unsigned Size = Flags.getByValSize();
+    unsigned Align = Flags.getByValAlign();
+
+    int FI = MFI->CreateStackObject(Size, Align, false);
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+
+    Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
+                          false,        //isVolatile,
+                          (Size <= 32), //AlwaysInline if size <= 32
+                          MachinePointerInfo(), MachinePointerInfo());
+    ByValArgs.push_back(FIPtr);
+  }
+
   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<SDValue, 8> MemOpChains;
 
-#if 0
+  const unsigned StackOffset = 92;
   // Walk the register/memloc assignments, inserting copies/loads.
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+  for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
+       i != e;
+       ++i, ++realArgIdx) {
     CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = OutVals[i];
+    SDValue Arg = OutVals[realArgIdx];
+
+    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+
+    //Use local copy if it is a byval arg.
+    if (Flags.isByVal())
+      Arg = ByValArgs[byvalArgIdx++];
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
       break;
     case CCValAssign::AExt:
-      Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
       break;
     }
 
-    // Arguments that can be passed on register must be kept at
-    // RegsToPass vector
-    if (VA.isRegLoc()) {
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    if (Flags.isSRet()) {
+      assert(VA.needsCustom());
+      // store SRet argument in %sp+64
+      SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+      SDValue PtrOff = DAG.getIntPtrConstant(64);
+      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
       continue;
     }
 
-    assert(VA.isMemLoc());
-
-    // Create a store off the stack pointer for this argument.
-    SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
-    // FIXME: VERIFY THAT 68 IS RIGHT.
-    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+68);
-    PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
-    MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0,
-                                       false, false, 0));
-  }
-
-#else
-  static const unsigned ArgRegs[] = {
-    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
-  };
-  unsigned ArgOffset = 68;
-
-  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
-    SDValue Val = OutVals[i];
-    EVT ObjectVT = Outs[i].VT;
-    SDValue ValToStore(0, 0);
-    unsigned ObjSize;
-    switch (ObjectVT.getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Unhandled argument type!");
-    case MVT::i32:
-      ObjSize = 4;
-
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Val;
-      } else {
-        RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
-      }
-      break;
-    case MVT::f32:
-      ObjSize = 4;
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Val;
-      } else {
-        // Convert this to a FP value in an int reg.
-        Val = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Val);
-        RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Val));
-      }
-      break;
-    case MVT::f64: {
-      ObjSize = 8;
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Val;    // Whole thing is passed in memory.
-        break;
+    if (VA.needsCustom()) {
+      assert(VA.getLocVT() == MVT::f64);
+
+      if (VA.isMemLoc()) {
+        unsigned Offset = VA.getLocMemOffset() + StackOffset;
+        //if it is double-word aligned, just store.
+        if (Offset % 8 == 0) {
+          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                             MachinePointerInfo(),
+                                             false, false, 0));
+          continue;
+        }
       }
 
-      // Break into top and bottom parts by storing to the stack and loading
-      // out the parts as integers.  Top part goes in a reg.
       SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
-      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 
-                                   Val, StackPtr, NULL, 0,
+      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+                                   Arg, StackPtr, MachinePointerInfo(),
                                    false, false, 0);
       // Sparc is big-endian, so the high part comes first.
-      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
-                               false, false, 0);
+      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
       // Increment the pointer to the other half.
       StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                              DAG.getIntPtrConstant(4));
       // Load the low part.
-      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, NULL, 0,
-                               false, false, 0);
-
-      RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
-
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Lo;
-        ArgOffset += 4;
-        ObjSize = 4;
+      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+      if (VA.isRegLoc()) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+        assert(i+1 != e);
+        CCValAssign &NextVA = ArgLocs[++i];
+        if (NextVA.isRegLoc()) {
+          RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+        } else {
+          //Store the low part in stack.
+          unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
+          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+                                             MachinePointerInfo(),
+                                             false, false, 0));
+        }
       } else {
-        RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+        unsigned Offset = VA.getLocMemOffset() + StackOffset;
+        // Store the high part.
+        SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+        SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+        // Store the low part.
+        PtrOff = DAG.getIntPtrConstant(Offset+4);
+        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
       }
-      break;
+      continue;
     }
-    case MVT::i64: {
-      ObjSize = 8;
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Val;    // Whole thing is passed in memory.
-        break;
-      }
 
-      // Split the value into top and bottom part.  Top part goes in a reg.
-      SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
-                                 DAG.getConstant(1, MVT::i32));
-      SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Val,
-                                 DAG.getConstant(0, MVT::i32));
-      RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Hi));
-
-      if (RegsToPass.size() >= 6) {
-        ValToStore = Lo;
-        ArgOffset += 4;
-        ObjSize = 4;
-      } else {
-        RegsToPass.push_back(std::make_pair(ArgRegs[RegsToPass.size()], Lo));
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      if (VA.getLocVT() != MVT::f32) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+        continue;
       }
-      break;
-    }
+      Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      continue;
     }
 
-    if (ValToStore.getNode()) {
-      SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
-      SDValue PtrOff = DAG.getConstant(ArgOffset, MVT::i32);
-      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-      MemOpChains.push_back(DAG.getStore(Chain, dl, ValToStore, 
-                                         PtrOff, NULL, 0,
-                                         false, false, 0));
-    }
-    ArgOffset += ObjSize;
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
+    PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
   }
-#endif
+
 
   // Emit all stores, make sure the occur before any copies into physregs.
   if (!MemOpChains.empty())
@@ -484,11 +554,22 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
     Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
 
-  std::vector<EVT> NodeTys;
-  NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
-  SDValue Ops[] = { Chain, Callee, InFlag };
-  Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops, InFlag.getNode() ? 3 : 2);
+  // Returns a chain & a flag for retval copy to use
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    unsigned Reg = RegsToPass[i].first;
+    if (Reg >= SP::I0 && Reg <= SP::I7)
+      Reg = Reg-SP::I0+SP::O0;
+
+    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+  }
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
   InFlag = Chain.getValue(1);
 
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
@@ -610,8 +691,8 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 
-  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
-  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 
   // Sparc has no select or setcc: expand to SELECT_CC.
   setOperationAction(ISD::SELECT, MVT::i32, Expand);
@@ -701,6 +782,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case SPISD::ITOF:       return "SPISD::ITOF";
   case SPISD::CALL:       return "SPISD::CALL";
   case SPISD::RET_FLAG:   return "SPISD::RET_FLAG";
+  case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
+  case SPISD::FLUSHW:     return "SPISD::FLUSHW";
   }
 }
 
@@ -756,7 +839,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
   }
 }
 
-SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, 
+SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   // FIXME there isn't really any debug info here
@@ -765,16 +848,16 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
   SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
   SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
 
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
     return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  
+
   SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
                                    getPointerTy());
   SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
-  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, 
+  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0, false, false, 0);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
@@ -786,16 +869,16 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
   SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
   SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
   SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
     return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
 
-  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl, 
+  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
                                    getPointerTy());
   SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 GlobalBase, RelAddr);
-  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 
-                     AbsAddr, NULL, 0, false, false, 0);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -803,13 +886,13 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
   // Convert the fp value to integer in an FP register.
   assert(Op.getValueType() == MVT::i32);
   Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
-  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
 }
 
 static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
   assert(Op.getOperand(0).getValueType() == MVT::i32);
-  SDValue Tmp = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0));
+  SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
   // Convert the int value to FP in an FP register.
   return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
 }
@@ -832,13 +915,13 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
   if (LHS.getValueType() == MVT::i32) {
     std::vector<EVT> VTs;
     VTs.push_back(MVT::i32);
-    VTs.push_back(MVT::Flag);
+    VTs.push_back(MVT::Glue);
     SDValue Ops[2] = { LHS, RHS };
     CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
     Opc = SPISD::BRICC;
   } else {
-    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
     if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
     Opc = SPISD::BRFCC;
   }
@@ -863,13 +946,13 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   if (LHS.getValueType() == MVT::i32) {
     std::vector<EVT> VTs;
     VTs.push_back(LHS.getValueType());   // subcc returns a value
-    VTs.push_back(MVT::Flag);
+    VTs.push_back(MVT::Glue);
     SDValue Ops[2] = { LHS, RHS };
     CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
     Opc = SPISD::SELECT_ICC;
     if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
   } else {
-    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Flag, LHS, RHS);
+    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
     Opc = SPISD::SELECT_FCC;
     if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
   }
@@ -891,8 +974,8 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
                 DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
                                 MVT::i32));
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0,
-                      false, false, 0);
+  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+                      MachinePointerInfo(SV), false, false, 0);
 }
 
 static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
@@ -902,27 +985,28 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   SDValue VAListPtr = Node->getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   DebugLoc dl = Node->getDebugLoc();
-  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, SV, 0,
-                               false, false, 0);
+  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+                               MachinePointerInfo(SV), false, false, 0);
   // Increment the pointer, VAList, to the next vaarg
   SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
                                   DAG.getConstant(VT.getSizeInBits()/8,
                                                   MVT::i32));
   // Store the incremented VAList to the legalized pointer
   InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
-                         VAListPtr, SV, 0, false, false, 0);
+                         VAListPtr, MachinePointerInfo(SV), false, false, 0);
   // Load the actual argument out of the pointer VAList, unless this is an
   // f64 load.
   if (VT != MVT::f64)
-    return DAG.getLoad(VT, dl, InChain, VAList, NULL, 0, false, false, 0);
+    return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
+                       false, false, 0);
 
   // Otherwise, load it as i64, then do a bitconvert.
-  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, NULL, 0,
+  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
                           false, false, 0);
 
   // Bit-Convert the value to f64.
   SDValue Ops[2] = {
-    DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, V),
+    DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
     V.getValue(1)
   };
   return DAG.getMergeValues(Ops, 2, dl);
@@ -947,13 +1031,82 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
 }
 
 
+static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Chain = DAG.getNode(SPISD::FLUSHW,
+                              dl, MVT::Other, DAG.getEntryNode());
+  return Chain;
+}
+
+static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned FrameReg = SP::I6;
+
+  uint64_t depth = Op.getConstantOperandVal(0);
+
+  SDValue FrameAddr;
+  if (depth == 0)
+    FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+  else {
+    // flush first to make sure the windowed registers' values are in stack
+    SDValue Chain = getFLUSHW(Op, DAG);
+    FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+    for (uint64_t i = 0; i != depth; ++i) {
+      SDValue Ptr = DAG.getNode(ISD::ADD,
+                                dl, MVT::i32,
+                                FrameAddr, DAG.getIntPtrConstant(56));
+      FrameAddr = DAG.getLoad(MVT::i32, dl,
+                              Chain,
+                              Ptr,
+                              MachinePointerInfo(), false, false, 0);
+    }
+  }
+  return FrameAddr;
+}
+
+static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned RetReg = SP::I7;
+
+  uint64_t depth = Op.getConstantOperandVal(0);
+
+  SDValue RetAddr;
+  if (depth == 0)
+    RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
+  else {
+    // flush first to make sure the windowed registers' values are in stack
+    SDValue Chain = getFLUSHW(Op, DAG);
+    RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT);
+
+    for (uint64_t i = 0; i != depth; ++i) {
+      SDValue Ptr = DAG.getNode(ISD::ADD,
+                                dl, MVT::i32,
+                                RetAddr,
+                                DAG.getIntPtrConstant((i == depth-1)?60:56));
+      RetAddr = DAG.getLoad(MVT::i32, dl,
+                            Chain,
+                            Ptr,
+                            MachinePointerInfo(), false, false, 0);
+    }
+  }
+  return RetAddr;
+}
+
 SDValue SparcTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
-  // Frame & Return address.  Currently unimplemented
-  case ISD::RETURNADDR: return SDValue();
-  case ISD::FRAMEADDR:  return SDValue();
+  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
   case ISD::GlobalTLSAddress:
     llvm_unreachable("TLS not implemented for Sparc.");
   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
@@ -1009,6 +1162,8 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   MachineFunction *F = BB->getParent();
   MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
 
   // Transfer the remainder of BB and its successor edges to sinkMBB.
   sinkMBB->splice(sinkMBB->begin(), BB,
@@ -1021,8 +1176,6 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   BB->addSuccessor(sinkMBB);
 
   BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
-  F->insert(It, copy0MBB);
-  F->insert(It, sinkMBB);
 
   //  copy0MBB:
   //   %FalseValue = ...
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
index db39e08..849e401 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -36,7 +36,8 @@ namespace llvm {
 
       CALL,        // A call instruction.
       RET_FLAG,    // Return with a flag operand.
-      GLOBAL_BASE_REG // Global base reg for PIC
+      GLOBAL_BASE_REG, // Global base reg for PIC
+      FLUSHW       // FLUSH register windows to stack
     };
   }
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 7ede8e7..afa3c1f 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -66,15 +66,200 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   return 0;
 }
 
+static bool IsIntegerCC(unsigned CC)
+{
+  return  (CC <= SPCC::ICC_VC);
+}
+
+
+static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
+{
+  switch(CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case SPCC::ICC_NE:   return SPCC::ICC_E;
+  case SPCC::ICC_E:    return SPCC::ICC_NE;
+  case SPCC::ICC_G:    return SPCC::ICC_LE;
+  case SPCC::ICC_LE:   return SPCC::ICC_G;
+  case SPCC::ICC_GE:   return SPCC::ICC_L;
+  case SPCC::ICC_L:    return SPCC::ICC_GE;
+  case SPCC::ICC_GU:   return SPCC::ICC_LEU;
+  case SPCC::ICC_LEU:  return SPCC::ICC_GU;
+  case SPCC::ICC_CC:   return SPCC::ICC_CS;
+  case SPCC::ICC_CS:   return SPCC::ICC_CC;
+  case SPCC::ICC_POS:  return SPCC::ICC_NEG;
+  case SPCC::ICC_NEG:  return SPCC::ICC_POS;
+  case SPCC::ICC_VC:   return SPCC::ICC_VS;
+  case SPCC::ICC_VS:   return SPCC::ICC_VC;
+
+  case SPCC::FCC_U:    return SPCC::FCC_O;
+  case SPCC::FCC_O:    return SPCC::FCC_U;
+  case SPCC::FCC_G:    return SPCC::FCC_LE;
+  case SPCC::FCC_LE:   return SPCC::FCC_G;
+  case SPCC::FCC_UG:   return SPCC::FCC_ULE;
+  case SPCC::FCC_ULE:  return SPCC::FCC_UG;
+  case SPCC::FCC_L:    return SPCC::FCC_GE;
+  case SPCC::FCC_GE:   return SPCC::FCC_L;
+  case SPCC::FCC_UL:   return SPCC::FCC_UGE;
+  case SPCC::FCC_UGE:  return SPCC::FCC_UL;
+  case SPCC::FCC_LG:   return SPCC::FCC_UE;
+  case SPCC::FCC_UE:   return SPCC::FCC_LG;
+  case SPCC::FCC_NE:   return SPCC::FCC_E;
+  case SPCC::FCC_E:    return SPCC::FCC_NE;
+  }
+}
+
+
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   bool AllowModify) const
+{
+
+  MachineBasicBlock::iterator I = MBB.end();
+  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+
+    if (I->isDebugValue())
+      continue;
+
+    //When we see a non-terminator, we are done
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    //Terminator is not a branch
+    if (!I->getDesc().isBranch())
+      return true;
+
+    //Handle Unconditional branches
+    if (I->getOpcode() == SP::BA) {
+      UnCondBrIter = I;
+
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
+      Cond.clear();
+      FBB = 0;
+
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        UnCondBrIter = MBB.end();
+        continue;
+      }
+
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    unsigned Opcode = I->getOpcode();
+    if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
+      return true; //Unknown Opcode
+
+    SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
+
+    if (Cond.empty()) {
+      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+      if (AllowModify && UnCondBrIter != MBB.end() &&
+          MBB.isLayoutSuccessor(TargetBB)) {
+
+        //Transform the code
+        //
+        //    brCC L1
+        //    ba L2
+        // L1:
+        //    ..
+        // L2:
+        //
+        // into
+        //
+        //   brnCC L2
+        // L1:
+        //   ...
+        // L2:
+        //
+        BranchCode = GetOppositeBranchCondition(BranchCode);
+        MachineBasicBlock::iterator OldInst = I;
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
+          .addMBB(TargetBB);
+        MBB.addSuccessor(TargetBB);
+        OldInst->eraseFromParent();
+        UnCondBrIter->eraseFromParent();
+
+        UnCondBrIter = MBB.end();
+        I = MBB.end();
+        continue;
+      }
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+    //FIXME: Handle subsequent conditional branches
+    //For now, we can't handle multiple conditional branches
+    return true;
+  }
+  return false;
+}
+
 unsigned
 SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
                              MachineBasicBlock *FBB,
                              const SmallVectorImpl<MachineOperand> &Cond,
-                             DebugLoc DL)const{
-  // Can only insert uncond branches so far.
-  assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
-  BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
-  return 1;
+                             DebugLoc DL) const {
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "Sparc branch conditions should have one component!");
+
+  if (Cond.empty()) {
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+    return 1;
+  }
+
+  //Conditional branch
+  unsigned CC = Cond[0].getImm();
+
+  if (IsIntegerCC(CC))
+    BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC);
+  else
+    BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+  if (!FBB)
+    return 1;
+
+  BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+  return 2;
+}
+
+unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+  while (I != MBB.begin()) {
+    --I;
+
+    if (I->isDebugValue())
+      continue;
+
+    if (I->getOpcode() != SP::BA
+        && I->getOpcode() != SP::BCOND
+        && I->getOpcode() != SP::FBCOND)
+      break; // Not a branch
+
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+  return Count;
 }
 
 void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
index c00bd21..b2d24f5 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -58,8 +58,15 @@ public:
   /// any side effects other than storing to the stack slot.
   virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
                                       int &FrameIndex) const;
-  
-  
+
+
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const ;
+
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
   virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 467ed48..1072323 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -95,10 +95,10 @@ SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
 def SDTSPITOF :
 SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
 
-def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutFlag]>;
-def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutFlag]>;
-def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
-def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInFlag]>;
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
 
 def SPhi    : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
 def SPlo    : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
@@ -106,8 +106,8 @@ def SPlo    : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
 def SPftoi  : SDNode<"SPISD::FTOI", SDTSPFTOI>;
 def SPitof  : SDNode<"SPISD::ITOF", SDTSPITOF>;
 
-def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInFlag]>;
-def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInFlag]>;
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
 
 //  These are target-independent nodes, but have target-specific formats.
 def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
@@ -115,16 +115,20 @@ def SDT_SPCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
                                         SDTCisVT<1, i32> ]>;
 
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
-                           [SDNPHasChain, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOutGlue]>;
 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
-def SDT_SPCall    : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def SDT_SPCall    : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
 def call          : SDNode<"SPISD::CALL", SDT_SPCall,
-                           [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
 
 def retflag       : SDNode<"SPISD::RET_FLAG", SDTNone,
-                           [SDNPHasChain, SDNPOptInFlag]>;
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+def flushw        : SDNode<"SPISD::FLUSHW", SDTNone,
+                           [SDNPHasChain]>;
 
 def getPCX        : Operand<i32> {
   let PrintMethod = "printGetPCX";
@@ -204,7 +208,7 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
    : InstSP<outs, ins, asmstr, pattern>;
 
 // GETPCX for PIC
-let Defs = [O7], Uses = [O7] in {
+let Defs = [O7] in {
   def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
 }
 
@@ -217,6 +221,17 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
                             [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
+let hasSideEffects = 1, mayStore = 1 in {
+  let rd = 0, rs1 = 0, rs2 = 0 in
+    def FLUSHW : F3_1<0b10, 0b101011, (outs), (ins),
+                      "flushw",
+                      [(flushw)]>, Requires<[HasV9]>;
+  let rd = 0, rs1 = 1, simm13 = 3 in
+    def TA3 : F3_2<0b10, 0b111010, (outs), (ins),
+                   "ta 3",
+                   [(flushw)]>;
+}
+
 // FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the 
 // fpmover pass.
 let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
@@ -233,32 +248,39 @@ let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.  This has to handle all
 // permutations of selection between i32/f32/f64 on ICC and FCC.
-let usesCustomInserter = 1 in {   // Expanded after instruction selection.
+  // Expanded after instruction selection.
+let Uses = [ICC], usesCustomInserter = 1 in { 
   def SELECT_CC_Int_ICC
    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
             "; SELECT_CC_Int_ICC PSEUDO!",
             [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
                                              imm:$Cond))]>;
-  def SELECT_CC_Int_FCC
-   : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
-            "; SELECT_CC_Int_FCC PSEUDO!",
-            [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
-                                             imm:$Cond))]>;
   def SELECT_CC_FP_ICC
    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_FP_ICC PSEUDO!",
             [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
                                             imm:$Cond))]>;
-  def SELECT_CC_FP_FCC
-   : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
-            "; SELECT_CC_FP_FCC PSEUDO!",
-            [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
-                                            imm:$Cond))]>;
+
   def SELECT_CC_DFP_ICC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_ICC PSEUDO!",
             [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
                                              imm:$Cond))]>;
+}
+
+let usesCustomInserter = 1, Uses = [FCC] in {
+
+  def SELECT_CC_Int_FCC
+   : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_Int_FCC PSEUDO!",
+            [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+                                             imm:$Cond))]>;
+
+  def SELECT_CC_FP_FCC
+   : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_FP_FCC PSEUDO!",
+            [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+                                            imm:$Cond))]>;
   def SELECT_CC_DFP_FCC
    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
             "; SELECT_CC_DFP_FCC PSEUDO!",
@@ -272,6 +294,9 @@ let usesCustomInserter = 1 in {   // Expanded after instruction selection.
 let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
   let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
     def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
+
+  let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in
+    def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>;
 }
 
 // Section B.1 - Load Integer Instructions, p. 90
@@ -436,28 +461,34 @@ def LEA_ADDri   : F3_2<2, 0b000000,
 let Defs = [ICC] in                   
   defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
 
-defm ADDX  : F3_12<"addx", 0b001000, adde>;
+let Uses = [ICC] in
+  defm ADDX  : F3_12<"addx", 0b001000, adde>;
 
 // Section B.15 - Subtract Instructions, p. 110
 defm SUB    : F3_12  <"sub"  , 0b000100, sub>;
-defm SUBX   : F3_12  <"subx" , 0b001100, sube>;
+let Uses = [ICC] in 
+  defm SUBX   : F3_12  <"subx" , 0b001100, sube>;
 
-let Defs = [ICC] in {
+let Defs = [ICC] in 
   defm SUBCC  : F3_12  <"subcc", 0b010100, SPcmpicc>;
 
+let Uses = [ICC], Defs = [ICC] in
   def SUBXCCrr: F3_1<2, 0b011100, 
                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
                 "subxcc $b, $c, $dst", []>;
-}
 
-// Section B.18 - Multiply Instructions, p. 113
-defm UMUL : F3_12np<"umul", 0b001010>;
-defm SMUL : F3_12  <"smul", 0b001011, mul>;
 
+// Section B.18 - Multiply Instructions, p. 113
+let Defs = [Y] in {
+  defm UMUL : F3_12np<"umul", 0b001010>;
+  defm SMUL : F3_12  <"smul", 0b001011, mul>;
+}
 
 // Section B.19 - Divide Instructions, p. 115
-defm UDIV : F3_12np<"udiv", 0b001110>;
-defm SDIV : F3_12np<"sdiv", 0b001111>;
+let Defs = [Y] in {
+  defm UDIV : F3_12np<"udiv", 0b001110>;
+  defm SDIV : F3_12np<"sdiv", 0b001111>;
+}
 
 // Section B.20 - SAVE and RESTORE, p. 117
 defm SAVE    : F3_12np<"save"   , 0b111100>;
@@ -504,11 +535,12 @@ let Uses = [FCC] in
 
 // Section B.24 - Call and Link Instruction, p. 125
 // This is the only Format 1 instruction
-let Uses = [O0, O1, O2, O3, O4, O5],
+let Uses = [O6],
     hasDelaySlot = 1, isCall = 1,
     Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
-    D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15] in { 
-  def CALL : InstSP<(outs), (ins calltarget:$dst),
+    D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+        ICC, FCC, Y] in {
+  def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
                     "call $dst", []> {
     bits<30> disp;
     let op = 1;
@@ -517,28 +549,30 @@ let Uses = [O0, O1, O2, O3, O4, O5],
   
   // indirect calls
   def JMPLrr : F3_1<2, 0b111000,
-                    (outs), (ins MEMrr:$ptr),
+                    (outs), (ins MEMrr:$ptr, variable_ops),
                     "call $ptr",
                     [(call ADDRrr:$ptr)]>;
   def JMPLri : F3_2<2, 0b111000,
-                    (outs), (ins MEMri:$ptr),
+                    (outs), (ins MEMri:$ptr, variable_ops),
                     "call $ptr",
                     [(call ADDRri:$ptr)]>;
 }
 
 // Section B.28 - Read State Register Instructions
-def RDY : F3_1<2, 0b101000,
-               (outs IntRegs:$dst), (ins),
-               "rd %y, $dst", []>;
+let Uses = [Y] in 
+  def RDY : F3_1<2, 0b101000,
+                 (outs IntRegs:$dst), (ins),
+                 "rd %y, $dst", []>;
 
 // Section B.29 - Write State Register Instructions
-def WRYrr : F3_1<2, 0b110000,
-                 (outs), (ins IntRegs:$b, IntRegs:$c),
-                 "wr $b, $c, %y", []>;
-def WRYri : F3_2<2, 0b110000,
-                 (outs), (ins IntRegs:$b, i32imm:$c),
-                 "wr $b, $c, %y", []>;
-
+let Defs = [Y] in {
+  def WRYrr : F3_1<2, 0b110000,
+                   (outs), (ins IntRegs:$b, IntRegs:$c),
+                   "wr $b, $c, %y", []>;
+  def WRYri : F3_2<2, 0b110000,
+                   (outs), (ins IntRegs:$b, i32imm:$c),
+                   "wr $b, $c, %y", []>;
+}
 // Convert Integer to Floating-point Instructions, p. 141
 def FITOS : F3_3<2, 0b110100, 0b011000100,
                  (outs FPRegs:$dst), (ins FPRegs:$src),
@@ -660,48 +694,57 @@ let Defs = [FCC] in {
 let Predicates = [HasV9], Constraints = "$T = $dst" in {
   // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
   // FIXME: Add instruction encodings for the JIT some day.
-  def MOVICCrr
-    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
-             "mov$cc %icc, $F, $dst",
-             [(set IntRegs:$dst,
-                         (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
-  def MOVICCri
-    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
-             "mov$cc %icc, $F, $dst",
-             [(set IntRegs:$dst,
-                          (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
-
-  def MOVFCCrr
-    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
-             "mov$cc %fcc0, $F, $dst",
-             [(set IntRegs:$dst,
-                         (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
-  def MOVFCCri
-    : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
-             "mov$cc %fcc0, $F, $dst",
-             [(set IntRegs:$dst,
-                          (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
-
-  def FMOVS_ICC
-    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
-             "fmovs$cc %icc, $F, $dst",
-             [(set FPRegs:$dst,
-                         (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
-  def FMOVD_ICC
-    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
-             "fmovd$cc %icc, $F, $dst",
-             [(set DFPRegs:$dst,
-                         (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
-  def FMOVS_FCC
-    : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
-             "fmovs$cc %fcc0, $F, $dst",
-             [(set FPRegs:$dst,
-                         (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
-  def FMOVD_FCC
-    : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
-             "fmovd$cc %fcc0, $F, $dst",
-             [(set DFPRegs:$dst,
-                         (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  let Uses = [ICC] in {
+    def MOVICCrr
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+               "mov$cc %icc, $F, $dst",
+               [(set IntRegs:$dst,
+                           (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+    def MOVICCri
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+               "mov$cc %icc, $F, $dst",
+               [(set IntRegs:$dst,
+                            (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [FCC] in {
+    def MOVFCCrr
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+               "mov$cc %fcc0, $F, $dst",
+               [(set IntRegs:$dst,
+                           (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+    def MOVFCCri
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+               "mov$cc %fcc0, $F, $dst",
+               [(set IntRegs:$dst,
+                            (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [ICC] in {
+    def FMOVS_ICC
+      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+               "fmovs$cc %icc, $F, $dst",
+               [(set FPRegs:$dst,
+                           (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+    def FMOVD_ICC
+      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+               "fmovd$cc %icc, $F, $dst",
+               [(set DFPRegs:$dst,
+                           (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [FCC] in {
+    def FMOVS_FCC
+      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+               "fmovs$cc %fcc0, $F, $dst",
+               [(set FPRegs:$dst,
+                           (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+    def FMOVD_FCC
+      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+               "fmovd$cc %fcc0, $F, $dst",
+               [(set DFPRegs:$dst,
+                           (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  }
 
 }
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
index e34c131..0b74308 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -24,16 +24,23 @@ namespace llvm {
     /// VarArgsFrameOffset - Frame offset to start of varargs area.
     int VarArgsFrameOffset;
 
+    /// SRetReturnReg - Holds the virtual register into which the sret
+    /// argument is passed.
+    unsigned SRetReturnReg;
   public:
-    SparcMachineFunctionInfo() : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
+    SparcMachineFunctionInfo()
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
     explicit SparcMachineFunctionInfo(MachineFunction &MF)
-      : GlobalBaseReg(0), VarArgsFrameOffset(0) {}
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
 
     unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
     void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
 
     int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
     void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+    unsigned getSRetReturnReg() const { return SRetReturnReg; }
+    void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
   };
 }
 
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
index c85db20..b010d04 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -52,10 +52,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const {
-  return false;
-}
-
 void SparcRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
@@ -112,55 +108,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 void SparcRegisterInfo::
 processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
 
-void SparcRegisterInfo::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Get the number of bytes to allocate from the FrameInfo
-  int NumBytes = (int) MFI->getStackSize();
-
-  // Emit the correct save instruction based on the number of bytes in
-  // the frame. Minimum stack frame size according to V8 ABI is:
-  //   16 words for register window spill
-  //    1 word for address of returned aggregate-value
-  // +  6 words for passing parameters on the stack
-  // ----------
-  //   23 words * 4 bytes per word = 92 bytes
-  NumBytes += 92;
-
-  // Round up to next doubleword boundary -- a double-word boundary
-  // is required by the ABI.
-  NumBytes = (NumBytes + 7) & ~7;
-  NumBytes = -NumBytes;
-  
-  if (NumBytes >= -4096) {
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
-      .addReg(SP::O6).addImm(NumBytes);
-  } else {
-    // Emit this the hard way.  This clobbers G1 which we always know is 
-    // available here.
-    unsigned OffHi = (unsigned)NumBytes >> 10U;
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
-    // Emit G1 = G1 + I6
-    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
-      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
-    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
-      .addReg(SP::O6).addReg(SP::G1);
-  }
-}
-
-void SparcRegisterInfo::emitEpilogue(MachineFunction &MF,
-                                     MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = prior(MBB.end());
-  DebugLoc dl = MBBI->getDebugLoc();
-  assert(MBBI->getOpcode() == SP::RETL &&
-         "Can only put epilog before 'retl' instruction!");
-  BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
-    .addReg(SP::G0);
-}
-
 unsigned SparcRegisterInfo::getRARegister() const {
   return SP::I7;
 }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
index 020ce56..d930b53 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h
@@ -26,16 +26,14 @@ class Type;
 struct SparcRegisterInfo : public SparcGenRegisterInfo {
   SparcSubtarget &Subtarget;
   const TargetInstrInfo &TII;
-  
+
   SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
 
-  /// Code Generation virtual methods...  
+  /// Code Generation virtual methods...
   const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  bool hasFP(const MachineFunction &MF) const;
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
@@ -45,9 +43,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
 
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-  
   // Debug information queries.
   unsigned getRARegister() const;
   unsigned getFrameRegister(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
index fede929..5ef4dae 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -45,6 +45,9 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
 def ICC : SparcCtrlReg<"ICC">;
 def FCC : SparcCtrlReg<"FCC">;
 
+// Y register
+def Y : SparcCtrlReg<"Y">;
+
 // Integer registers
 def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
 def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index b58d6ba..b84eab5 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -10,9 +10,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Sparc.h"
 #include "SparcMCAsmInfo.h"
 #include "SparcTargetMachine.h"
-#include "Sparc.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
@@ -34,8 +34,8 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
   : LLVMTargetMachine(T, TT),
     Subtarget(TT, FS, is64bit),
     DataLayout(Subtarget.getDataLayout()),
-     TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
-    FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+    TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
+    FrameLowering(Subtarget) {
 }
 
 bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
index 322c82a..c4bb6bd 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h
@@ -14,13 +14,14 @@
 #ifndef SPARCTARGETMACHINE_H
 #define SPARCTARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
 #include "SparcInstrInfo.h"
-#include "SparcSubtarget.h"
 #include "SparcISelLowering.h"
+#include "SparcFrameLowering.h"
 #include "SparcSelectionDAGInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
 
@@ -30,13 +31,15 @@ class SparcTargetMachine : public LLVMTargetMachine {
   SparcTargetLowering TLInfo;
   SparcSelectionDAGInfo TSInfo;
   SparcInstrInfo InstrInfo;
-  TargetFrameInfo FrameInfo;
+  SparcFrameLowering FrameLowering;
 public:
   SparcTargetMachine(const Target &T, const std::string &TT,
                      const std::string &FS, bool is64bit);
 
   virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
-  virtual const TargetFrameInfo  *getFrameInfo() const { return &FrameInfo; }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
   virtual const SparcSubtarget   *getSubtargetImpl() const{ return &Subtarget; }
   virtual const SparcRegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();