Pull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen): Implement atomicrmw operations in 32 and 64 bits for SPARCv9. These all use the compare-and-swap CASA/CASXA instructions. Introduced here: http://svnweb.freebsd.org/changeset/base/262261 Index: test/CodeGen/SPARC/atomics.ll =================================================================== --- test/CodeGen/SPARC/atomics.ll +++ test/CodeGen/SPARC/atomics.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=sparcv9 | FileCheck %s +; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: test_atomic_i32 ; CHECK: ld [%o0] @@ -61,3 +61,84 @@ entry: %b = atomicrmw xchg i32* %ptr, i32 42 monotonic ret i32 %b } + +; CHECK-LABEL: test_load_add_32 +; CHECK: membar +; CHECK: add +; CHECK: cas [%o0] +; CHECK: membar +define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) { +entry: + %0 = atomicrmw add i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_sub_64 +; CHECK: membar +; CHECK: sub +; CHECK: casx [%o0] +; CHECK: membar +define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) { +entry: + %0 = atomicrmw sub i64* %p, i64 %v seq_cst + ret i64 %0 +} + +; CHECK-LABEL: test_load_xor_32 +; CHECK: membar +; CHECK: xor +; CHECK: cas [%o0] +; CHECK: membar +define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) { +entry: + %0 = atomicrmw xor i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_and_32 +; CHECK: membar +; CHECK: and +; CHECK-NOT: xor +; CHECK: cas [%o0] +; CHECK: membar +define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) { +entry: + %0 = atomicrmw and i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_nand_32 +; CHECK: membar +; CHECK: and +; CHECK: xor +; CHECK: cas [%o0] +; CHECK: membar +define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) { +entry: + %0 = atomicrmw nand i32* %p, i32 %v seq_cst + ret i32 %0 +} + +; CHECK-LABEL: test_load_max_64 +; CHECK: membar +; CHECK: cmp +; CHECK: movg %xcc +; CHECK: casx [%o0] +; CHECK: membar +define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) { +entry: + %0 = atomicrmw max i64* %p, i64 %v seq_cst + ret i64 %0 +} + +; CHECK-LABEL: test_load_umin_32 +; CHECK: membar +; CHECK: cmp +; CHECK: movleu %icc +; CHECK: cas [%o0] +; CHECK: membar +define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) { +entry: + %0 = atomicrmw umin i32* %p, i32 %v seq_cst + ret i32 %0 +} Index: lib/Target/Sparc/SparcInstr64Bit.td =================================================================== --- lib/Target/Sparc/SparcInstr64Bit.td +++ lib/Target/Sparc/SparcInstr64Bit.td @@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S } // Predicates = [Is64Bit] +let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1, + Defs = [ICC] in +multiclass AtomicRMW { + + def _32 : Pseudo<(outs IntRegs:$rd), + (ins ptr_rc:$addr, IntRegs:$rs2), "", + [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>; + + let Predicates = [Is64Bit] in + def _64 : Pseudo<(outs I64Regs:$rd), + (ins ptr_rc:$addr, I64Regs:$rs2), "", + [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>; +} + +defm ATOMIC_LOAD_ADD : AtomicRMW; +defm ATOMIC_LOAD_SUB : AtomicRMW; +defm ATOMIC_LOAD_AND : AtomicRMW; +defm ATOMIC_LOAD_OR : AtomicRMW; +defm ATOMIC_LOAD_XOR : AtomicRMW; +defm ATOMIC_LOAD_NAND : AtomicRMW; +defm ATOMIC_LOAD_MIN : AtomicRMW; +defm ATOMIC_LOAD_MAX : AtomicRMW; +defm ATOMIC_LOAD_UMIN : AtomicRMW; +defm ATOMIC_LOAD_UMAX : AtomicRMW; + // Global addresses, constant pool entries let Predicates = [Is64Bit] in { Index: lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- lib/Target/Sparc/SparcISelLowering.cpp +++ lib/Target/Sparc/SparcISelLowering.cpp @@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons MachineBasicBlock * SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); - unsigned BROpcode; - unsigned CC; - DebugLoc dl = MI->getDebugLoc(); - // Figure out the conditional branch opcode to use for this select_cc. switch (MI->getOpcode()) { default: llvm_unreachable("Unknown SELECT_CC!"); case SP::SELECT_CC_Int_ICC: @@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M case SP::SELECT_CC_FP_ICC: case SP::SELECT_CC_DFP_ICC: case SP::SELECT_CC_QFP_ICC: - BROpcode = SP::BCOND; - break; + return expandSelectCC(MI, BB, SP::BCOND); case SP::SELECT_CC_Int_FCC: case SP::SELECT_CC_FP_FCC: case SP::SELECT_CC_DFP_FCC: case SP::SELECT_CC_QFP_FCC: - BROpcode = SP::FBCOND; - break; + return expandSelectCC(MI, BB, SP::FBCOND); + + case SP::ATOMIC_LOAD_ADD_32: + return expandAtomicRMW(MI, BB, SP::ADDrr); + case SP::ATOMIC_LOAD_ADD_64: + return expandAtomicRMW(MI, BB, SP::ADDXrr); + case SP::ATOMIC_LOAD_SUB_32: + return expandAtomicRMW(MI, BB, SP::SUBrr); + case SP::ATOMIC_LOAD_SUB_64: + return expandAtomicRMW(MI, BB, SP::SUBXrr); + case SP::ATOMIC_LOAD_AND_32: + return expandAtomicRMW(MI, BB, SP::ANDrr); + case SP::ATOMIC_LOAD_AND_64: + return expandAtomicRMW(MI, BB, SP::ANDXrr); + case SP::ATOMIC_LOAD_OR_32: + return expandAtomicRMW(MI, BB, SP::ORrr); + case SP::ATOMIC_LOAD_OR_64: + return expandAtomicRMW(MI, BB, SP::ORXrr); + case SP::ATOMIC_LOAD_XOR_32: + return expandAtomicRMW(MI, BB, SP::XORrr); + case SP::ATOMIC_LOAD_XOR_64: + return expandAtomicRMW(MI, BB, SP::XORXrr); + case SP::ATOMIC_LOAD_NAND_32: + return expandAtomicRMW(MI, BB, SP::ANDrr); + case SP::ATOMIC_LOAD_NAND_64: + return expandAtomicRMW(MI, BB, SP::ANDXrr); + + case SP::ATOMIC_LOAD_MAX_32: + return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G); + case SP::ATOMIC_LOAD_MAX_64: + return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G); + case SP::ATOMIC_LOAD_MIN_32: + return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE); + case SP::ATOMIC_LOAD_MIN_64: + return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE); + case SP::ATOMIC_LOAD_UMAX_32: + return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU); + case SP::ATOMIC_LOAD_UMAX_64: + return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU); + case SP::ATOMIC_LOAD_UMIN_32: + return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU); + case SP::ATOMIC_LOAD_UMIN_64: + return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU); } +} - CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); +MachineBasicBlock* +SparcTargetLowering::expandSelectCC(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned BROpcode) const { + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm(); // To "insert" a SELECT_CC instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg @@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M return BB; } +MachineBasicBlock* +SparcTargetLowering::expandAtomicRMW(MachineInstr *MI, + MachineBasicBlock *MBB, + unsigned Opcode, + unsigned CondCode) const { + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + + // MI is an atomic read-modify-write instruction of the form: + // + // rd = atomicrmw addr, rs2 + // + // All three operands are registers. + unsigned DestReg = MI->getOperand(0).getReg(); + unsigned AddrReg = MI->getOperand(1).getReg(); + unsigned Rs2Reg = MI->getOperand(2).getReg(); + + // SelectionDAG has already inserted memory barriers before and after MI, so + // we simply have to implement the operatiuon in terms of compare-and-swap. + // + // %val0 = load %addr + // loop: + // %val = phi %val0, %dest + // %upd = op %val, %rs2 + // %dest = cas %addr, %upd, %val + // cmp %val, %dest + // bne loop + // done: + // + bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg)); + const TargetRegisterClass *ValueRC = + is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass; + unsigned Val0Reg = MRI.createVirtualRegister(ValueRC); + + BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg) + .addReg(AddrReg).addImm(0); + + // Split the basic block MBB before MI and insert the loop block in the hole. + MachineFunction::iterator MFI = MBB; + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineFunction *MF = MBB->getParent(); + MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB); + ++MFI; + MF->insert(MFI, LoopMBB); + MF->insert(MFI, DoneMBB); + + // Move MI and following instructions to DoneMBB. + DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end()); + DoneMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Connect the CFG again. + MBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + + // Build the loop block. + unsigned ValReg = MRI.createVirtualRegister(ValueRC); + unsigned UpdReg = MRI.createVirtualRegister(ValueRC); + + BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg) + .addReg(Val0Reg).addMBB(MBB) + .addReg(DestReg).addMBB(LoopMBB); + + if (CondCode) { + // This is one of the min/max operations. We need a CMPrr followed by a + // MOVXCC/MOVICC. + BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg); + BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg) + .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode); + } else { + BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg) + .addReg(ValReg).addReg(Rs2Reg); + } + + if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 || + MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) { + unsigned TmpReg = UpdReg; + UpdReg = MRI.createVirtualRegister(ValueRC); + BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1); + } + + BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg) + .addReg(AddrReg).addReg(UpdReg).addReg(ValReg) + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg); + BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND)) + .addMBB(LoopMBB).addImm(SPCC::ICC_NE); + + MI->eraseFromParent(); + return DoneMBB; +} + //===----------------------------------------------------------------------===// // Sparc Inline Assembly Support //===----------------------------------------------------------------------===// Index: lib/Target/Sparc/SparcISelLowering.h =================================================================== --- lib/Target/Sparc/SparcISelLowering.h +++ lib/Target/Sparc/SparcISelLowering.h @@ -165,6 +165,13 @@ namespace llvm { virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl& Results, SelectionDAG &DAG) const; + + MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB, + unsigned BROpcode) const; + MachineBasicBlock *expandAtomicRMW(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Opcode, + unsigned CondCode = 0) const; }; } // end namespace llvm