diff options
Diffstat (limited to 'contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff')
-rw-r--r-- | contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff b/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff new file mode 100644 index 0000000..b1dec41 --- /dev/null +++ b/contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff @@ -0,0 +1,344 @@ +Pull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen): + + Implement atomicrmw operations in 32 and 64 bits for SPARCv9. + + These all use the compare-and-swap CASA/CASXA instructions. + +Introduced here: http://svn.freebsd.org/changeset/base/262261 + +Index: test/CodeGen/SPARC/atomics.ll +=================================================================== +--- test/CodeGen/SPARC/atomics.ll ++++ test/CodeGen/SPARC/atomics.ll +@@ -1,4 +1,4 @@ +-; RUN: llc < %s -march=sparcv9 | FileCheck %s ++; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s + + ; CHECK-LABEL: test_atomic_i32 + ; CHECK: ld [%o0] +@@ -61,3 +61,84 @@ entry: + %b = atomicrmw xchg i32* %ptr, i32 42 monotonic + ret i32 %b + } ++ ++; CHECK-LABEL: test_load_add_32 ++; CHECK: membar ++; CHECK: add ++; CHECK: cas [%o0] ++; CHECK: membar ++define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) { ++entry: ++ %0 = atomicrmw add i32* %p, i32 %v seq_cst ++ ret i32 %0 ++} ++ ++; CHECK-LABEL: test_load_sub_64 ++; CHECK: membar ++; CHECK: sub ++; CHECK: casx [%o0] ++; CHECK: membar ++define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) { ++entry: ++ %0 = atomicrmw sub i64* %p, i64 %v seq_cst ++ ret i64 %0 ++} ++ ++; CHECK-LABEL: test_load_xor_32 ++; CHECK: membar ++; CHECK: xor ++; CHECK: cas [%o0] ++; CHECK: membar ++define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) { ++entry: ++ %0 = atomicrmw xor i32* %p, i32 %v seq_cst ++ ret i32 %0 ++} ++ ++; CHECK-LABEL: test_load_and_32 ++; CHECK: membar ++; CHECK: and ++; CHECK-NOT: xor ++; CHECK: cas [%o0] ++; CHECK: membar ++define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) { ++entry: ++ %0 = atomicrmw and i32* %p, i32 %v seq_cst ++ ret i32 %0 ++} ++ ++; CHECK-LABEL: test_load_nand_32 ++; CHECK: membar ++; CHECK: and ++; CHECK: xor ++; CHECK: cas [%o0] ++; CHECK: membar ++define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) { ++entry: ++ %0 = atomicrmw nand i32* %p, i32 %v seq_cst ++ ret i32 %0 ++} ++ ++; CHECK-LABEL: test_load_max_64 ++; CHECK: membar ++; CHECK: cmp ++; CHECK: movg %xcc ++; CHECK: casx [%o0] ++; CHECK: membar ++define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) { ++entry: ++ %0 = atomicrmw max i64* %p, i64 %v seq_cst ++ ret i64 %0 ++} ++ ++; CHECK-LABEL: test_load_umin_32 ++; CHECK: membar ++; CHECK: cmp ++; CHECK: movleu %icc ++; CHECK: cas [%o0] ++; CHECK: membar ++define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) { ++entry: ++ %0 = atomicrmw umin i32* %p, i32 %v seq_cst ++ ret i32 %0 ++} +Index: lib/Target/Sparc/SparcInstr64Bit.td +=================================================================== +--- lib/Target/Sparc/SparcInstr64Bit.td ++++ lib/Target/Sparc/SparcInstr64Bit.td +@@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S + + } // Predicates = [Is64Bit] + ++let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1, ++ Defs = [ICC] in ++multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> { ++ ++ def _32 : Pseudo<(outs IntRegs:$rd), ++ (ins ptr_rc:$addr, IntRegs:$rs2), "", ++ [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>; ++ ++ let Predicates = [Is64Bit] in ++ def _64 : Pseudo<(outs I64Regs:$rd), ++ (ins ptr_rc:$addr, I64Regs:$rs2), "", ++ [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>; ++} ++ ++defm ATOMIC_LOAD_ADD : AtomicRMW<atomic_load_add_32, atomic_load_add_64>; ++defm ATOMIC_LOAD_SUB : AtomicRMW<atomic_load_sub_32, atomic_load_sub_64>; ++defm ATOMIC_LOAD_AND : AtomicRMW<atomic_load_and_32, atomic_load_and_64>; ++defm ATOMIC_LOAD_OR : AtomicRMW<atomic_load_or_32, atomic_load_or_64>; ++defm ATOMIC_LOAD_XOR : AtomicRMW<atomic_load_xor_32, atomic_load_xor_64>; ++defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>; ++defm ATOMIC_LOAD_MIN : AtomicRMW<atomic_load_min_32, atomic_load_min_64>; ++defm ATOMIC_LOAD_MAX : AtomicRMW<atomic_load_max_32, atomic_load_max_64>; ++defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>; ++defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>; ++ + // Global addresses, constant pool entries + let Predicates = [Is64Bit] in { + +Index: lib/Target/Sparc/SparcISelLowering.cpp +=================================================================== +--- lib/Target/Sparc/SparcISelLowering.cpp ++++ lib/Target/Sparc/SparcISelLowering.cpp +@@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons + MachineBasicBlock * + SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { +- const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); +- unsigned BROpcode; +- unsigned CC; +- DebugLoc dl = MI->getDebugLoc(); +- // Figure out the conditional branch opcode to use for this select_cc. + switch (MI->getOpcode()) { + default: llvm_unreachable("Unknown SELECT_CC!"); + case SP::SELECT_CC_Int_ICC: +@@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M + case SP::SELECT_CC_FP_ICC: + case SP::SELECT_CC_DFP_ICC: + case SP::SELECT_CC_QFP_ICC: +- BROpcode = SP::BCOND; +- break; ++ return expandSelectCC(MI, BB, SP::BCOND); + case SP::SELECT_CC_Int_FCC: + case SP::SELECT_CC_FP_FCC: + case SP::SELECT_CC_DFP_FCC: + case SP::SELECT_CC_QFP_FCC: +- BROpcode = SP::FBCOND; +- break; ++ return expandSelectCC(MI, BB, SP::FBCOND); ++ ++ case SP::ATOMIC_LOAD_ADD_32: ++ return expandAtomicRMW(MI, BB, SP::ADDrr); ++ case SP::ATOMIC_LOAD_ADD_64: ++ return expandAtomicRMW(MI, BB, SP::ADDXrr); ++ case SP::ATOMIC_LOAD_SUB_32: ++ return expandAtomicRMW(MI, BB, SP::SUBrr); ++ case SP::ATOMIC_LOAD_SUB_64: ++ return expandAtomicRMW(MI, BB, SP::SUBXrr); ++ case SP::ATOMIC_LOAD_AND_32: ++ return expandAtomicRMW(MI, BB, SP::ANDrr); ++ case SP::ATOMIC_LOAD_AND_64: ++ return expandAtomicRMW(MI, BB, SP::ANDXrr); ++ case SP::ATOMIC_LOAD_OR_32: ++ return expandAtomicRMW(MI, BB, SP::ORrr); ++ case SP::ATOMIC_LOAD_OR_64: ++ return expandAtomicRMW(MI, BB, SP::ORXrr); ++ case SP::ATOMIC_LOAD_XOR_32: ++ return expandAtomicRMW(MI, BB, SP::XORrr); ++ case SP::ATOMIC_LOAD_XOR_64: ++ return expandAtomicRMW(MI, BB, SP::XORXrr); ++ case SP::ATOMIC_LOAD_NAND_32: ++ return expandAtomicRMW(MI, BB, SP::ANDrr); ++ case SP::ATOMIC_LOAD_NAND_64: ++ return expandAtomicRMW(MI, BB, SP::ANDXrr); ++ ++ case SP::ATOMIC_LOAD_MAX_32: ++ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G); ++ case SP::ATOMIC_LOAD_MAX_64: ++ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G); ++ case SP::ATOMIC_LOAD_MIN_32: ++ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE); ++ case SP::ATOMIC_LOAD_MIN_64: ++ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE); ++ case SP::ATOMIC_LOAD_UMAX_32: ++ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU); ++ case SP::ATOMIC_LOAD_UMAX_64: ++ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU); ++ case SP::ATOMIC_LOAD_UMIN_32: ++ return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU); ++ case SP::ATOMIC_LOAD_UMIN_64: ++ return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU); + } ++} + +- CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); ++MachineBasicBlock* ++SparcTargetLowering::expandSelectCC(MachineInstr *MI, ++ MachineBasicBlock *BB, ++ unsigned BROpcode) const { ++ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); ++ DebugLoc dl = MI->getDebugLoc(); ++ unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); + + // To "insert" a SELECT_CC instruction, we actually have to insert the diamond + // control-flow pattern. The incoming instruction knows the destination vreg +@@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M + return BB; + } + ++MachineBasicBlock* ++SparcTargetLowering::expandAtomicRMW(MachineInstr *MI, ++ MachineBasicBlock *MBB, ++ unsigned Opcode, ++ unsigned CondCode) const { ++ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); ++ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); ++ DebugLoc DL = MI->getDebugLoc(); ++ ++ // MI is an atomic read-modify-write instruction of the form: ++ // ++ // rd = atomicrmw<op> addr, rs2 ++ // ++ // All three operands are registers. ++ unsigned DestReg = MI->getOperand(0).getReg(); ++ unsigned AddrReg = MI->getOperand(1).getReg(); ++ unsigned Rs2Reg = MI->getOperand(2).getReg(); ++ ++ // SelectionDAG has already inserted memory barriers before and after MI, so ++ // we simply have to implement the operatiuon in terms of compare-and-swap. ++ // ++ // %val0 = load %addr ++ // loop: ++ // %val = phi %val0, %dest ++ // %upd = op %val, %rs2 ++ // %dest = cas %addr, %upd, %val ++ // cmp %val, %dest ++ // bne loop ++ // done: ++ // ++ bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg)); ++ const TargetRegisterClass *ValueRC = ++ is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; ++ unsigned Val0Reg = MRI.createVirtualRegister(ValueRC); ++ ++ BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg) ++ .addReg(AddrReg).addImm(0); ++ ++ // Split the basic block MBB before MI and insert the loop block in the hole. ++ MachineFunction::iterator MFI = MBB; ++ const BasicBlock *LLVM_BB = MBB->getBasicBlock(); ++ MachineFunction *MF = MBB->getParent(); ++ MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB); ++ ++MFI; ++ MF->insert(MFI, LoopMBB); ++ MF->insert(MFI, DoneMBB); ++ ++ // Move MI and following instructions to DoneMBB. ++ DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end()); ++ DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); ++ ++ // Connect the CFG again. ++ MBB->addSuccessor(LoopMBB); ++ LoopMBB->addSuccessor(LoopMBB); ++ LoopMBB->addSuccessor(DoneMBB); ++ ++ // Build the loop block. ++ unsigned ValReg = MRI.createVirtualRegister(ValueRC); ++ unsigned UpdReg = MRI.createVirtualRegister(ValueRC); ++ ++ BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg) ++ .addReg(Val0Reg).addMBB(MBB) ++ .addReg(DestReg).addMBB(LoopMBB); ++ ++ if (CondCode) { ++ // This is one of the min/max operations. We need a CMPrr followed by a ++ // MOVXCC/MOVICC. ++ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg); ++ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg) ++ .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode); ++ } else { ++ BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg) ++ .addReg(ValReg).addReg(Rs2Reg); ++ } ++ ++ if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 || ++ MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) { ++ unsigned TmpReg = UpdReg; ++ UpdReg = MRI.createVirtualRegister(ValueRC); ++ BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1); ++ } ++ ++ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg) ++ .addReg(AddrReg).addReg(UpdReg).addReg(ValReg) ++ .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); ++ BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg); ++ BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND)) ++ .addMBB(LoopMBB).addImm(SPCC::ICC_NE); ++ ++ MI->eraseFromParent(); ++ return DoneMBB; ++} ++ + //===----------------------------------------------------------------------===// + // Sparc Inline Assembly Support + //===----------------------------------------------------------------------===// +Index: lib/Target/Sparc/SparcISelLowering.h +=================================================================== +--- lib/Target/Sparc/SparcISelLowering.h ++++ lib/Target/Sparc/SparcISelLowering.h +@@ -165,6 +165,13 @@ namespace llvm { + virtual void ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue>& Results, + SelectionDAG &DAG) const; ++ ++ MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB, ++ unsigned BROpcode) const; ++ MachineBasicBlock *expandAtomicRMW(MachineInstr *MI, ++ MachineBasicBlock *BB, ++ unsigned Opcode, ++ unsigned CondCode = 0) const; + }; + } // end namespace llvm + |