1 Pull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen):
3 Implement atomicrmw operations in 32 and 64 bits for SPARCv9.
5 These all use the compare-and-swap CASA/CASXA instructions.
7 Introduced here: http://svn.freebsd.org/changeset/base/262261
9 Index: test/CodeGen/SPARC/atomics.ll
10 ===================================================================
11 --- test/CodeGen/SPARC/atomics.ll
12 +++ test/CodeGen/SPARC/atomics.ll
14 -; RUN: llc < %s -march=sparcv9 | FileCheck %s
15 +; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s
17 ; CHECK-LABEL: test_atomic_i32
19 @@ -61,3 +61,84 @@ entry:
20 %b = atomicrmw xchg i32* %ptr, i32 42 monotonic
24 +; CHECK-LABEL: test_load_add_32
29 +define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) {
31 + %0 = atomicrmw add i32* %p, i32 %v seq_cst
35 +; CHECK-LABEL: test_load_sub_64
40 +define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) {
42 + %0 = atomicrmw sub i64* %p, i64 %v seq_cst
46 +; CHECK-LABEL: test_load_xor_32
51 +define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) {
53 + %0 = atomicrmw xor i32* %p, i32 %v seq_cst
57 +; CHECK-LABEL: test_load_and_32
63 +define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) {
65 + %0 = atomicrmw and i32* %p, i32 %v seq_cst
69 +; CHECK-LABEL: test_load_nand_32
75 +define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) {
77 + %0 = atomicrmw nand i32* %p, i32 %v seq_cst
81 +; CHECK-LABEL: test_load_max_64
87 +define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) {
89 + %0 = atomicrmw max i64* %p, i64 %v seq_cst
93 +; CHECK-LABEL: test_load_umin_32
99 +define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) {
101 + %0 = atomicrmw umin i32* %p, i32 %v seq_cst
104 Index: lib/Target/Sparc/SparcInstr64Bit.td
105 ===================================================================
106 --- lib/Target/Sparc/SparcInstr64Bit.td
107 +++ lib/Target/Sparc/SparcInstr64Bit.td
108 @@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S
110 } // Predicates = [Is64Bit]
112 +let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1,
114 +multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> {
116 + def _32 : Pseudo<(outs IntRegs:$rd),
117 + (ins ptr_rc:$addr, IntRegs:$rs2), "",
118 + [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>;
120 + let Predicates = [Is64Bit] in
121 + def _64 : Pseudo<(outs I64Regs:$rd),
122 + (ins ptr_rc:$addr, I64Regs:$rs2), "",
123 + [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>;
126 +defm ATOMIC_LOAD_ADD : AtomicRMW<atomic_load_add_32, atomic_load_add_64>;
127 +defm ATOMIC_LOAD_SUB : AtomicRMW<atomic_load_sub_32, atomic_load_sub_64>;
128 +defm ATOMIC_LOAD_AND : AtomicRMW<atomic_load_and_32, atomic_load_and_64>;
129 +defm ATOMIC_LOAD_OR : AtomicRMW<atomic_load_or_32, atomic_load_or_64>;
130 +defm ATOMIC_LOAD_XOR : AtomicRMW<atomic_load_xor_32, atomic_load_xor_64>;
131 +defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>;
132 +defm ATOMIC_LOAD_MIN : AtomicRMW<atomic_load_min_32, atomic_load_min_64>;
133 +defm ATOMIC_LOAD_MAX : AtomicRMW<atomic_load_max_32, atomic_load_max_64>;
134 +defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>;
135 +defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>;
137 // Global addresses, constant pool entries
138 let Predicates = [Is64Bit] in {
140 Index: lib/Target/Sparc/SparcISelLowering.cpp
141 ===================================================================
142 --- lib/Target/Sparc/SparcISelLowering.cpp
143 +++ lib/Target/Sparc/SparcISelLowering.cpp
144 @@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons
146 SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
147 MachineBasicBlock *BB) const {
148 - const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
151 - DebugLoc dl = MI->getDebugLoc();
152 - // Figure out the conditional branch opcode to use for this select_cc.
153 switch (MI->getOpcode()) {
154 default: llvm_unreachable("Unknown SELECT_CC!");
155 case SP::SELECT_CC_Int_ICC:
156 @@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
157 case SP::SELECT_CC_FP_ICC:
158 case SP::SELECT_CC_DFP_ICC:
159 case SP::SELECT_CC_QFP_ICC:
160 - BROpcode = SP::BCOND;
162 + return expandSelectCC(MI, BB, SP::BCOND);
163 case SP::SELECT_CC_Int_FCC:
164 case SP::SELECT_CC_FP_FCC:
165 case SP::SELECT_CC_DFP_FCC:
166 case SP::SELECT_CC_QFP_FCC:
167 - BROpcode = SP::FBCOND;
169 + return expandSelectCC(MI, BB, SP::FBCOND);
171 + case SP::ATOMIC_LOAD_ADD_32:
172 + return expandAtomicRMW(MI, BB, SP::ADDrr);
173 + case SP::ATOMIC_LOAD_ADD_64:
174 + return expandAtomicRMW(MI, BB, SP::ADDXrr);
175 + case SP::ATOMIC_LOAD_SUB_32:
176 + return expandAtomicRMW(MI, BB, SP::SUBrr);
177 + case SP::ATOMIC_LOAD_SUB_64:
178 + return expandAtomicRMW(MI, BB, SP::SUBXrr);
179 + case SP::ATOMIC_LOAD_AND_32:
180 + return expandAtomicRMW(MI, BB, SP::ANDrr);
181 + case SP::ATOMIC_LOAD_AND_64:
182 + return expandAtomicRMW(MI, BB, SP::ANDXrr);
183 + case SP::ATOMIC_LOAD_OR_32:
184 + return expandAtomicRMW(MI, BB, SP::ORrr);
185 + case SP::ATOMIC_LOAD_OR_64:
186 + return expandAtomicRMW(MI, BB, SP::ORXrr);
187 + case SP::ATOMIC_LOAD_XOR_32:
188 + return expandAtomicRMW(MI, BB, SP::XORrr);
189 + case SP::ATOMIC_LOAD_XOR_64:
190 + return expandAtomicRMW(MI, BB, SP::XORXrr);
191 + case SP::ATOMIC_LOAD_NAND_32:
192 + return expandAtomicRMW(MI, BB, SP::ANDrr);
193 + case SP::ATOMIC_LOAD_NAND_64:
194 + return expandAtomicRMW(MI, BB, SP::ANDXrr);
196 + case SP::ATOMIC_LOAD_MAX_32:
197 + return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G);
198 + case SP::ATOMIC_LOAD_MAX_64:
199 + return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G);
200 + case SP::ATOMIC_LOAD_MIN_32:
201 + return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE);
202 + case SP::ATOMIC_LOAD_MIN_64:
203 + return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE);
204 + case SP::ATOMIC_LOAD_UMAX_32:
205 + return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU);
206 + case SP::ATOMIC_LOAD_UMAX_64:
207 + return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU);
208 + case SP::ATOMIC_LOAD_UMIN_32:
209 + return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU);
210 + case SP::ATOMIC_LOAD_UMIN_64:
211 + return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU);
215 - CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
217 +SparcTargetLowering::expandSelectCC(MachineInstr *MI,
218 + MachineBasicBlock *BB,
219 + unsigned BROpcode) const {
220 + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
221 + DebugLoc dl = MI->getDebugLoc();
222 + unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
224 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
225 // control-flow pattern. The incoming instruction knows the destination vreg
226 @@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
231 +SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
232 + MachineBasicBlock *MBB,
234 + unsigned CondCode) const {
235 + const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
236 + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
237 + DebugLoc DL = MI->getDebugLoc();
239 + // MI is an atomic read-modify-write instruction of the form:
241 + // rd = atomicrmw<op> addr, rs2
243 + // All three operands are registers.
244 + unsigned DestReg = MI->getOperand(0).getReg();
245 + unsigned AddrReg = MI->getOperand(1).getReg();
246 + unsigned Rs2Reg = MI->getOperand(2).getReg();
248 + // SelectionDAG has already inserted memory barriers before and after MI, so
249 + // we simply have to implement the operatiuon in terms of compare-and-swap.
251 + // %val0 = load %addr
253 + // %val = phi %val0, %dest
254 + // %upd = op %val, %rs2
255 + // %dest = cas %addr, %upd, %val
260 + bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg));
261 + const TargetRegisterClass *ValueRC =
262 + is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
263 + unsigned Val0Reg = MRI.createVirtualRegister(ValueRC);
265 + BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg)
266 + .addReg(AddrReg).addImm(0);
268 + // Split the basic block MBB before MI and insert the loop block in the hole.
269 + MachineFunction::iterator MFI = MBB;
270 + const BasicBlock *LLVM_BB = MBB->getBasicBlock();
271 + MachineFunction *MF = MBB->getParent();
272 + MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
273 + MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB);
275 + MF->insert(MFI, LoopMBB);
276 + MF->insert(MFI, DoneMBB);
278 + // Move MI and following instructions to DoneMBB.
279 + DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end());
280 + DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
282 + // Connect the CFG again.
283 + MBB->addSuccessor(LoopMBB);
284 + LoopMBB->addSuccessor(LoopMBB);
285 + LoopMBB->addSuccessor(DoneMBB);
287 + // Build the loop block.
288 + unsigned ValReg = MRI.createVirtualRegister(ValueRC);
289 + unsigned UpdReg = MRI.createVirtualRegister(ValueRC);
291 + BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg)
292 + .addReg(Val0Reg).addMBB(MBB)
293 + .addReg(DestReg).addMBB(LoopMBB);
296 + // This is one of the min/max operations. We need a CMPrr followed by a
298 + BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg);
299 + BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
300 + .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode);
302 + BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
303 + .addReg(ValReg).addReg(Rs2Reg);
306 + if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 ||
307 + MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) {
308 + unsigned TmpReg = UpdReg;
309 + UpdReg = MRI.createVirtualRegister(ValueRC);
310 + BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1);
313 + BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg)
314 + .addReg(AddrReg).addReg(UpdReg).addReg(ValReg)
315 + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
316 + BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg);
317 + BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND))
318 + .addMBB(LoopMBB).addImm(SPCC::ICC_NE);
320 + MI->eraseFromParent();
324 //===----------------------------------------------------------------------===//
325 // Sparc Inline Assembly Support
326 //===----------------------------------------------------------------------===//
327 Index: lib/Target/Sparc/SparcISelLowering.h
328 ===================================================================
329 --- lib/Target/Sparc/SparcISelLowering.h
330 +++ lib/Target/Sparc/SparcISelLowering.h
331 @@ -165,6 +165,13 @@ namespace llvm {
332 virtual void ReplaceNodeResults(SDNode *N,
333 SmallVectorImpl<SDValue>& Results,
334 SelectionDAG &DAG) const;
336 + MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
337 + unsigned BROpcode) const;
338 + MachineBasicBlock *expandAtomicRMW(MachineInstr *MI,
339 + MachineBasicBlock *BB,
341 + unsigned CondCode = 0) const;
343 } // end namespace llvm