1 files changed, 344 insertions, 0 deletions
diff --git a/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff b/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff
new file mode 100644
index 0000000..b1dec41
--- /dev/null
+++ b/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff
@@ -0,0 +1,344 @@
+Pull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen):
+
+  Implement atomicrmw operations in 32 and 64 bits for SPARCv9.
+
+  These all use the compare-and-swap CASA/CASXA instructions.
+
+Introduced here: http://svn.freebsd.org/changeset/base/262261
+
+Index: test/CodeGen/SPARC/atomics.ll
+===================================================================
+--- test/CodeGen/SPARC/atomics.ll
++++ test/CodeGen/SPARC/atomics.ll
+@@ -1,4 +1,4 @@
+-; RUN: llc < %s -march=sparcv9 | FileCheck %s
++; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s
+ 
+ ; CHECK-LABEL: test_atomic_i32
+ ; CHECK:       ld [%o0]
+@@ -61,3 +61,84 @@ entry:
+   %b = atomicrmw xchg i32* %ptr, i32 42 monotonic
+   ret i32 %b
+ }
++
++; CHECK-LABEL: test_load_add_32
++; CHECK: membar
++; CHECK: add
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) {
++entry:
++  %0 = atomicrmw add i32* %p, i32 %v seq_cst
++  ret i32 %0
++}
++
++; CHECK-LABEL: test_load_sub_64
++; CHECK: membar
++; CHECK: sub
++; CHECK: casx [%o0]
++; CHECK: membar
++define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) {
++entry:
++  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
++  ret i64 %0
++}
++
++; CHECK-LABEL: test_load_xor_32
++; CHECK: membar
++; CHECK: xor
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) {
++entry:
++  %0 = atomicrmw xor i32* %p, i32 %v seq_cst
++  ret i32 %0
++}
++
++; CHECK-LABEL: test_load_and_32
++; CHECK: membar
++; CHECK: and
++; CHECK-NOT: xor
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) {
++entry:
++  %0 = atomicrmw and i32* %p, i32 %v seq_cst
++  ret i32 %0
++}
++
++; CHECK-LABEL: test_load_nand_32
++; CHECK: membar
++; CHECK: and
++; CHECK: xor
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) {
++entry:
++  %0 = atomicrmw nand i32* %p, i32 %v seq_cst
++  ret i32 %0
++}
++
++; CHECK-LABEL: test_load_max_64
++; CHECK: membar
++; CHECK: cmp
++; CHECK: movg %xcc
++; CHECK: casx [%o0]
++; CHECK: membar
++define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) {
++entry:
++  %0 = atomicrmw max i64* %p, i64 %v seq_cst
++  ret i64 %0
++}
++
++; CHECK-LABEL: test_load_umin_32
++; CHECK: membar
++; CHECK: cmp
++; CHECK: movleu %icc
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) {
++entry:
++  %0 = atomicrmw umin i32* %p, i32 %v seq_cst
++  ret i32 %0
++}
+Index: lib/Target/Sparc/SparcInstr64Bit.td
+===================================================================
+--- lib/Target/Sparc/SparcInstr64Bit.td
++++ lib/Target/Sparc/SparcInstr64Bit.td
+@@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S
+ 
+ } // Predicates = [Is64Bit]
+ 
++let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1,
++    Defs = [ICC] in
++multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> {
++
++  def _32 : Pseudo<(outs IntRegs:$rd),
++                   (ins ptr_rc:$addr, IntRegs:$rs2), "",
++                   [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>;
++
++  let Predicates = [Is64Bit] in
++  def _64 : Pseudo<(outs I64Regs:$rd),
++                   (ins ptr_rc:$addr, I64Regs:$rs2), "",
++                   [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>;
++}
++
++defm ATOMIC_LOAD_ADD  : AtomicRMW<atomic_load_add_32,  atomic_load_add_64>;
++defm ATOMIC_LOAD_SUB  : AtomicRMW<atomic_load_sub_32,  atomic_load_sub_64>;
++defm ATOMIC_LOAD_AND  : AtomicRMW<atomic_load_and_32,  atomic_load_and_64>;
++defm ATOMIC_LOAD_OR   : AtomicRMW<atomic_load_or_32,   atomic_load_or_64>;
++defm ATOMIC_LOAD_XOR  : AtomicRMW<atomic_load_xor_32,  atomic_load_xor_64>;
++defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>;
++defm ATOMIC_LOAD_MIN  : AtomicRMW<atomic_load_min_32,  atomic_load_min_64>;
++defm ATOMIC_LOAD_MAX  : AtomicRMW<atomic_load_max_32,  atomic_load_max_64>;
++defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>;
++defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>;
++
+ // Global addresses, constant pool entries
+ let Predicates = [Is64Bit] in {
+ 
+Index: lib/Target/Sparc/SparcISelLowering.cpp
+===================================================================
+--- lib/Target/Sparc/SparcISelLowering.cpp
++++ lib/Target/Sparc/SparcISelLowering.cpp
+@@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons
+ MachineBasicBlock *
+ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *BB) const {
+-  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+-  unsigned BROpcode;
+-  unsigned CC;
+-  DebugLoc dl = MI->getDebugLoc();
+-  // Figure out the conditional branch opcode to use for this select_cc.
+   switch (MI->getOpcode()) {
+   default: llvm_unreachable("Unknown SELECT_CC!");
+   case SP::SELECT_CC_Int_ICC:
+@@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
+   case SP::SELECT_CC_FP_ICC:
+   case SP::SELECT_CC_DFP_ICC:
+   case SP::SELECT_CC_QFP_ICC:
+-    BROpcode = SP::BCOND;
+-    break;
++    return expandSelectCC(MI, BB, SP::BCOND);
+   case SP::SELECT_CC_Int_FCC:
+   case SP::SELECT_CC_FP_FCC:
+   case SP::SELECT_CC_DFP_FCC:
+   case SP::SELECT_CC_QFP_FCC:
+-    BROpcode = SP::FBCOND;
+-    break;
++    return expandSelectCC(MI, BB, SP::FBCOND);
++
++  case SP::ATOMIC_LOAD_ADD_32:
++    return expandAtomicRMW(MI, BB, SP::ADDrr);
++  case SP::ATOMIC_LOAD_ADD_64:
++    return expandAtomicRMW(MI, BB, SP::ADDXrr);
++  case SP::ATOMIC_LOAD_SUB_32:
++    return expandAtomicRMW(MI, BB, SP::SUBrr);
++  case SP::ATOMIC_LOAD_SUB_64:
++    return expandAtomicRMW(MI, BB, SP::SUBXrr);
++  case SP::ATOMIC_LOAD_AND_32:
++    return expandAtomicRMW(MI, BB, SP::ANDrr);
++  case SP::ATOMIC_LOAD_AND_64:
++    return expandAtomicRMW(MI, BB, SP::ANDXrr);
++  case SP::ATOMIC_LOAD_OR_32:
++    return expandAtomicRMW(MI, BB, SP::ORrr);
++  case SP::ATOMIC_LOAD_OR_64:
++    return expandAtomicRMW(MI, BB, SP::ORXrr);
++  case SP::ATOMIC_LOAD_XOR_32:
++    return expandAtomicRMW(MI, BB, SP::XORrr);
++  case SP::ATOMIC_LOAD_XOR_64:
++    return expandAtomicRMW(MI, BB, SP::XORXrr);
++  case SP::ATOMIC_LOAD_NAND_32:
++    return expandAtomicRMW(MI, BB, SP::ANDrr);
++  case SP::ATOMIC_LOAD_NAND_64:
++    return expandAtomicRMW(MI, BB, SP::ANDXrr);
++
++  case SP::ATOMIC_LOAD_MAX_32:
++    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G);
++  case SP::ATOMIC_LOAD_MAX_64:
++    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G);
++  case SP::ATOMIC_LOAD_MIN_32:
++    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE);
++  case SP::ATOMIC_LOAD_MIN_64:
++    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE);
++  case SP::ATOMIC_LOAD_UMAX_32:
++    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU);
++  case SP::ATOMIC_LOAD_UMAX_64:
++    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU);
++  case SP::ATOMIC_LOAD_UMIN_32:
++    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU);
++  case SP::ATOMIC_LOAD_UMIN_64:
++    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU);
+   }
++}
+ 
+-  CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
++MachineBasicBlock*
++SparcTargetLowering::expandSelectCC(MachineInstr *MI,
++                                    MachineBasicBlock *BB,
++                                    unsigned BROpcode) const {
++  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
++  DebugLoc dl = MI->getDebugLoc();
++  unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
+ 
+   // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+   // control-flow pattern.  The incoming instruction knows the destination vreg
+@@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
+   return BB;
+ }
+ 
++MachineBasicBlock*
++SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
++                                     MachineBasicBlock *MBB,
++                                     unsigned Opcode,
++                                     unsigned CondCode) const {
++  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
++  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
++  DebugLoc DL = MI->getDebugLoc();
++
++  // MI is an atomic read-modify-write instruction of the form:
++  //
++  //   rd = atomicrmw<op> addr, rs2
++  //
++  // All three operands are registers.
++  unsigned DestReg = MI->getOperand(0).getReg();
++  unsigned AddrReg = MI->getOperand(1).getReg();
++  unsigned Rs2Reg  = MI->getOperand(2).getReg();
++
++  // SelectionDAG has already inserted memory barriers before and after MI, so
++  // we simply have to implement the operatiuon in terms of compare-and-swap.
++  //
++  //   %val0 = load %addr
++  // loop:
++  //   %val = phi %val0, %dest
++  //   %upd = op %val, %rs2
++  //   %dest = cas %addr, %upd, %val
++  //   cmp %val, %dest
++  //   bne loop
++  // done:
++  //
++  bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg));
++  const TargetRegisterClass *ValueRC =
++    is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
++  unsigned Val0Reg = MRI.createVirtualRegister(ValueRC);
++
++  BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg)
++    .addReg(AddrReg).addImm(0);
++
++  // Split the basic block MBB before MI and insert the loop block in the hole.
++  MachineFunction::iterator MFI = MBB;
++  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
++  MachineFunction *MF = MBB->getParent();
++  MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++  ++MFI;
++  MF->insert(MFI, LoopMBB);
++  MF->insert(MFI, DoneMBB);
++
++  // Move MI and following instructions to DoneMBB.
++  DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end());
++  DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
++
++  // Connect the CFG again.
++  MBB->addSuccessor(LoopMBB);
++  LoopMBB->addSuccessor(LoopMBB);
++  LoopMBB->addSuccessor(DoneMBB);
++
++  // Build the loop block.
++  unsigned ValReg = MRI.createVirtualRegister(ValueRC);
++  unsigned UpdReg = MRI.createVirtualRegister(ValueRC);
++
++  BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg)
++    .addReg(Val0Reg).addMBB(MBB)
++    .addReg(DestReg).addMBB(LoopMBB);
++
++  if (CondCode) {
++    // This is one of the min/max operations. We need a CMPrr followed by a
++    // MOVXCC/MOVICC.
++    BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg);
++    BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
++      .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode);
++  } else {
++    BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
++      .addReg(ValReg).addReg(Rs2Reg);
++  }
++
++  if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 ||
++      MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) {
++    unsigned TmpReg = UpdReg;
++    UpdReg = MRI.createVirtualRegister(ValueRC);
++    BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1);
++  }
++
++  BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg)
++    .addReg(AddrReg).addReg(UpdReg).addReg(ValReg)
++    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
++  BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg);
++  BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND))
++    .addMBB(LoopMBB).addImm(SPCC::ICC_NE);
++
++  MI->eraseFromParent();
++  return DoneMBB;
++}
++
+ //===----------------------------------------------------------------------===//
+ //                         Sparc Inline Assembly Support
+ //===----------------------------------------------------------------------===//
+Index: lib/Target/Sparc/SparcISelLowering.h
+===================================================================
+--- lib/Target/Sparc/SparcISelLowering.h
++++ lib/Target/Sparc/SparcISelLowering.h
+@@ -165,6 +165,13 @@ namespace llvm {
+     virtual void ReplaceNodeResults(SDNode *N,
+                                     SmallVectorImpl<SDValue>& Results,
+                                     SelectionDAG &DAG) const;
++
++    MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
++                                      unsigned BROpcode) const;
++    MachineBasicBlock *expandAtomicRMW(MachineInstr *MI,
++                                       MachineBasicBlock *BB,
++                                       unsigned Opcode,
++                                       unsigned CondCode = 0) const;
+   };
+ } // end namespace llvm
+