summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff')
-rw-r--r--contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff344
1 files changed, 344 insertions, 0 deletions
diff --git a/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff b/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff
new file mode 100644
index 0000000..b1dec41
--- /dev/null
+++ b/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff
@@ -0,0 +1,344 @@
+Pull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen):
+
+ Implement atomicrmw operations in 32 and 64 bits for SPARCv9.
+
+ These all use the compare-and-swap CASA/CASXA instructions.
+
+Introduced here: http://svn.freebsd.org/changeset/base/262261
+
+Index: test/CodeGen/SPARC/atomics.ll
+===================================================================
+--- test/CodeGen/SPARC/atomics.ll
++++ test/CodeGen/SPARC/atomics.ll
+@@ -1,4 +1,4 @@
+-; RUN: llc < %s -march=sparcv9 | FileCheck %s
++; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s
+
+ ; CHECK-LABEL: test_atomic_i32
+ ; CHECK: ld [%o0]
+@@ -61,3 +61,84 @@ entry:
+ %b = atomicrmw xchg i32* %ptr, i32 42 monotonic
+ ret i32 %b
+ }
++
++; CHECK-LABEL: test_load_add_32
++; CHECK: membar
++; CHECK: add
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) {
++entry:
++ %0 = atomicrmw add i32* %p, i32 %v seq_cst
++ ret i32 %0
++}
++
++; CHECK-LABEL: test_load_sub_64
++; CHECK: membar
++; CHECK: sub
++; CHECK: casx [%o0]
++; CHECK: membar
++define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) {
++entry:
++ %0 = atomicrmw sub i64* %p, i64 %v seq_cst
++ ret i64 %0
++}
++
++; CHECK-LABEL: test_load_xor_32
++; CHECK: membar
++; CHECK: xor
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) {
++entry:
++ %0 = atomicrmw xor i32* %p, i32 %v seq_cst
++ ret i32 %0
++}
++
++; CHECK-LABEL: test_load_and_32
++; CHECK: membar
++; CHECK: and
++; CHECK-NOT: xor
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) {
++entry:
++ %0 = atomicrmw and i32* %p, i32 %v seq_cst
++ ret i32 %0
++}
++
++; CHECK-LABEL: test_load_nand_32
++; CHECK: membar
++; CHECK: and
++; CHECK: xor
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) {
++entry:
++ %0 = atomicrmw nand i32* %p, i32 %v seq_cst
++ ret i32 %0
++}
++
++; CHECK-LABEL: test_load_max_64
++; CHECK: membar
++; CHECK: cmp
++; CHECK: movg %xcc
++; CHECK: casx [%o0]
++; CHECK: membar
++define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) {
++entry:
++ %0 = atomicrmw max i64* %p, i64 %v seq_cst
++ ret i64 %0
++}
++
++; CHECK-LABEL: test_load_umin_32
++; CHECK: membar
++; CHECK: cmp
++; CHECK: movleu %icc
++; CHECK: cas [%o0]
++; CHECK: membar
++define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) {
++entry:
++ %0 = atomicrmw umin i32* %p, i32 %v seq_cst
++ ret i32 %0
++}
+Index: lib/Target/Sparc/SparcInstr64Bit.td
+===================================================================
+--- lib/Target/Sparc/SparcInstr64Bit.td
++++ lib/Target/Sparc/SparcInstr64Bit.td
+@@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S
+
+ } // Predicates = [Is64Bit]
+
++let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1,
++ Defs = [ICC] in
++multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> {
++
++ def _32 : Pseudo<(outs IntRegs:$rd),
++ (ins ptr_rc:$addr, IntRegs:$rs2), "",
++ [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>;
++
++ let Predicates = [Is64Bit] in
++ def _64 : Pseudo<(outs I64Regs:$rd),
++ (ins ptr_rc:$addr, I64Regs:$rs2), "",
++ [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>;
++}
++
++defm ATOMIC_LOAD_ADD : AtomicRMW<atomic_load_add_32, atomic_load_add_64>;
++defm ATOMIC_LOAD_SUB : AtomicRMW<atomic_load_sub_32, atomic_load_sub_64>;
++defm ATOMIC_LOAD_AND : AtomicRMW<atomic_load_and_32, atomic_load_and_64>;
++defm ATOMIC_LOAD_OR : AtomicRMW<atomic_load_or_32, atomic_load_or_64>;
++defm ATOMIC_LOAD_XOR : AtomicRMW<atomic_load_xor_32, atomic_load_xor_64>;
++defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>;
++defm ATOMIC_LOAD_MIN : AtomicRMW<atomic_load_min_32, atomic_load_min_64>;
++defm ATOMIC_LOAD_MAX : AtomicRMW<atomic_load_max_32, atomic_load_max_64>;
++defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>;
++defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>;
++
+ // Global addresses, constant pool entries
+ let Predicates = [Is64Bit] in {
+
+Index: lib/Target/Sparc/SparcISelLowering.cpp
+===================================================================
+--- lib/Target/Sparc/SparcISelLowering.cpp
++++ lib/Target/Sparc/SparcISelLowering.cpp
+@@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons
+ MachineBasicBlock *
+ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+- unsigned BROpcode;
+- unsigned CC;
+- DebugLoc dl = MI->getDebugLoc();
+- // Figure out the conditional branch opcode to use for this select_cc.
+ switch (MI->getOpcode()) {
+ default: llvm_unreachable("Unknown SELECT_CC!");
+ case SP::SELECT_CC_Int_ICC:
+@@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
+ case SP::SELECT_CC_FP_ICC:
+ case SP::SELECT_CC_DFP_ICC:
+ case SP::SELECT_CC_QFP_ICC:
+- BROpcode = SP::BCOND;
+- break;
++ return expandSelectCC(MI, BB, SP::BCOND);
+ case SP::SELECT_CC_Int_FCC:
+ case SP::SELECT_CC_FP_FCC:
+ case SP::SELECT_CC_DFP_FCC:
+ case SP::SELECT_CC_QFP_FCC:
+- BROpcode = SP::FBCOND;
+- break;
++ return expandSelectCC(MI, BB, SP::FBCOND);
++
++ case SP::ATOMIC_LOAD_ADD_32:
++ return expandAtomicRMW(MI, BB, SP::ADDrr);
++ case SP::ATOMIC_LOAD_ADD_64:
++ return expandAtomicRMW(MI, BB, SP::ADDXrr);
++ case SP::ATOMIC_LOAD_SUB_32:
++ return expandAtomicRMW(MI, BB, SP::SUBrr);
++ case SP::ATOMIC_LOAD_SUB_64:
++ return expandAtomicRMW(MI, BB, SP::SUBXrr);
++ case SP::ATOMIC_LOAD_AND_32:
++ return expandAtomicRMW(MI, BB, SP::ANDrr);
++ case SP::ATOMIC_LOAD_AND_64:
++ return expandAtomicRMW(MI, BB, SP::ANDXrr);
++ case SP::ATOMIC_LOAD_OR_32:
++ return expandAtomicRMW(MI, BB, SP::ORrr);
++ case SP::ATOMIC_LOAD_OR_64:
++ return expandAtomicRMW(MI, BB, SP::ORXrr);
++ case SP::ATOMIC_LOAD_XOR_32:
++ return expandAtomicRMW(MI, BB, SP::XORrr);
++ case SP::ATOMIC_LOAD_XOR_64:
++ return expandAtomicRMW(MI, BB, SP::XORXrr);
++ case SP::ATOMIC_LOAD_NAND_32:
++ return expandAtomicRMW(MI, BB, SP::ANDrr);
++ case SP::ATOMIC_LOAD_NAND_64:
++ return expandAtomicRMW(MI, BB, SP::ANDXrr);
++
++ case SP::ATOMIC_LOAD_MAX_32:
++ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G);
++ case SP::ATOMIC_LOAD_MAX_64:
++ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G);
++ case SP::ATOMIC_LOAD_MIN_32:
++ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE);
++ case SP::ATOMIC_LOAD_MIN_64:
++ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE);
++ case SP::ATOMIC_LOAD_UMAX_32:
++ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU);
++ case SP::ATOMIC_LOAD_UMAX_64:
++ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU);
++ case SP::ATOMIC_LOAD_UMIN_32:
++ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU);
++ case SP::ATOMIC_LOAD_UMIN_64:
++ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU);
+ }
++}
+
+- CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
++MachineBasicBlock*
++SparcTargetLowering::expandSelectCC(MachineInstr *MI,
++ MachineBasicBlock *BB,
++ unsigned BROpcode) const {
++ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
++ DebugLoc dl = MI->getDebugLoc();
++ unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
+
+ // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+ // control-flow pattern. The incoming instruction knows the destination vreg
+@@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
+ return BB;
+ }
+
++MachineBasicBlock*
++SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
++ MachineBasicBlock *MBB,
++ unsigned Opcode,
++ unsigned CondCode) const {
++ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
++ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
++ DebugLoc DL = MI->getDebugLoc();
++
++ // MI is an atomic read-modify-write instruction of the form:
++ //
++ // rd = atomicrmw<op> addr, rs2
++ //
++ // All three operands are registers.
++ unsigned DestReg = MI->getOperand(0).getReg();
++ unsigned AddrReg = MI->getOperand(1).getReg();
++ unsigned Rs2Reg = MI->getOperand(2).getReg();
++
++ // SelectionDAG has already inserted memory barriers before and after MI, so
++ // we simply have to implement the operatiuon in terms of compare-and-swap.
++ //
++ // %val0 = load %addr
++ // loop:
++ // %val = phi %val0, %dest
++ // %upd = op %val, %rs2
++ // %dest = cas %addr, %upd, %val
++ // cmp %val, %dest
++ // bne loop
++ // done:
++ //
++ bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg));
++ const TargetRegisterClass *ValueRC =
++ is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
++ unsigned Val0Reg = MRI.createVirtualRegister(ValueRC);
++
++ BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg)
++ .addReg(AddrReg).addImm(0);
++
++ // Split the basic block MBB before MI and insert the loop block in the hole.
++ MachineFunction::iterator MFI = MBB;
++ const BasicBlock *LLVM_BB = MBB->getBasicBlock();
++ MachineFunction *MF = MBB->getParent();
++ MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++ MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB);
++ ++MFI;
++ MF->insert(MFI, LoopMBB);
++ MF->insert(MFI, DoneMBB);
++
++ // Move MI and following instructions to DoneMBB.
++ DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end());
++ DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
++
++ // Connect the CFG again.
++ MBB->addSuccessor(LoopMBB);
++ LoopMBB->addSuccessor(LoopMBB);
++ LoopMBB->addSuccessor(DoneMBB);
++
++ // Build the loop block.
++ unsigned ValReg = MRI.createVirtualRegister(ValueRC);
++ unsigned UpdReg = MRI.createVirtualRegister(ValueRC);
++
++ BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg)
++ .addReg(Val0Reg).addMBB(MBB)
++ .addReg(DestReg).addMBB(LoopMBB);
++
++ if (CondCode) {
++ // This is one of the min/max operations. We need a CMPrr followed by a
++ // MOVXCC/MOVICC.
++ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg);
++ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
++ .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode);
++ } else {
++ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
++ .addReg(ValReg).addReg(Rs2Reg);
++ }
++
++ if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 ||
++ MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) {
++ unsigned TmpReg = UpdReg;
++ UpdReg = MRI.createVirtualRegister(ValueRC);
++ BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1);
++ }
++
++ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg)
++ .addReg(AddrReg).addReg(UpdReg).addReg(ValReg)
++ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
++ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg);
++ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND))
++ .addMBB(LoopMBB).addImm(SPCC::ICC_NE);
++
++ MI->eraseFromParent();
++ return DoneMBB;
++}
++
+ //===----------------------------------------------------------------------===//
+ // Sparc Inline Assembly Support
+ //===----------------------------------------------------------------------===//
+Index: lib/Target/Sparc/SparcISelLowering.h
+===================================================================
+--- lib/Target/Sparc/SparcISelLowering.h
++++ lib/Target/Sparc/SparcISelLowering.h
+@@ -165,6 +165,13 @@ namespace llvm {
+ virtual void ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>& Results,
+ SelectionDAG &DAG) const;
++
++ MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
++ unsigned BROpcode) const;
++ MachineBasicBlock *expandAtomicRMW(MachineInstr *MI,
++ MachineBasicBlock *BB,
++ unsigned Opcode,
++ unsigned CondCode = 0) const;
+ };
+ } // end namespace llvm
+
OpenPOWER on IntegriCloud