contrib/llvm/patches/patch-r262261-llvm-r199975-sparc.diff

   1 Pull in r199975 from upstream llvm trunk (by Jakob Stoklund Olesen):
   2
   3   Implement atomicrmw operations in 32 and 64 bits for SPARCv9.
   4
   5   These all use the compare-and-swap CASA/CASXA instructions.
   6
   7 Introduced here: http://svnweb.freebsd.org/changeset/base/262261
   8
   9 Index: test/CodeGen/SPARC/atomics.ll
  10 ===================================================================
  11 --- test/CodeGen/SPARC/atomics.ll
  12 +++ test/CodeGen/SPARC/atomics.ll
  13 @@ -1,4 +1,4 @@
  14 -; RUN: llc < %s -march=sparcv9 | FileCheck %s
  15 +; RUN: llc < %s -march=sparcv9 -verify-machineinstrs | FileCheck %s
  16
  17  ; CHECK-LABEL: test_atomic_i32
  18  ; CHECK:       ld [%o0]
  19 @@ -61,3 +61,84 @@ entry:
  20    %b = atomicrmw xchg i32* %ptr, i32 42 monotonic
  21    ret i32 %b
  22  }
  23 +
  24 +; CHECK-LABEL: test_load_add_32
  25 +; CHECK: membar
  26 +; CHECK: add
  27 +; CHECK: cas [%o0]
  28 +; CHECK: membar
  29 +define zeroext i32 @test_load_add_32(i32* %p, i32 zeroext %v) {
  30 +entry:
  31 +  %0 = atomicrmw add i32* %p, i32 %v seq_cst
  32 +  ret i32 %0
  33 +}
  34 +
  35 +; CHECK-LABEL: test_load_sub_64
  36 +; CHECK: membar
  37 +; CHECK: sub
  38 +; CHECK: casx [%o0]
  39 +; CHECK: membar
  40 +define zeroext i64 @test_load_sub_64(i64* %p, i64 zeroext %v) {
  41 +entry:
  42 +  %0 = atomicrmw sub i64* %p, i64 %v seq_cst
  43 +  ret i64 %0
  44 +}
  45 +
  46 +; CHECK-LABEL: test_load_xor_32
  47 +; CHECK: membar
  48 +; CHECK: xor
  49 +; CHECK: cas [%o0]
  50 +; CHECK: membar
  51 +define zeroext i32 @test_load_xor_32(i32* %p, i32 zeroext %v) {
  52 +entry:
  53 +  %0 = atomicrmw xor i32* %p, i32 %v seq_cst
  54 +  ret i32 %0
  55 +}
  56 +
  57 +; CHECK-LABEL: test_load_and_32
  58 +; CHECK: membar
  59 +; CHECK: and
  60 +; CHECK-NOT: xor
  61 +; CHECK: cas [%o0]
  62 +; CHECK: membar
  63 +define zeroext i32 @test_load_and_32(i32* %p, i32 zeroext %v) {
  64 +entry:
  65 +  %0 = atomicrmw and i32* %p, i32 %v seq_cst
  66 +  ret i32 %0
  67 +}
  68 +
  69 +; CHECK-LABEL: test_load_nand_32
  70 +; CHECK: membar
  71 +; CHECK: and
  72 +; CHECK: xor
  73 +; CHECK: cas [%o0]
  74 +; CHECK: membar
  75 +define zeroext i32 @test_load_nand_32(i32* %p, i32 zeroext %v) {
  76 +entry:
  77 +  %0 = atomicrmw nand i32* %p, i32 %v seq_cst
  78 +  ret i32 %0
  79 +}
  80 +
  81 +; CHECK-LABEL: test_load_max_64
  82 +; CHECK: membar
  83 +; CHECK: cmp
  84 +; CHECK: movg %xcc
  85 +; CHECK: casx [%o0]
  86 +; CHECK: membar
  87 +define zeroext i64 @test_load_max_64(i64* %p, i64 zeroext %v) {
  88 +entry:
  89 +  %0 = atomicrmw max i64* %p, i64 %v seq_cst
  90 +  ret i64 %0
  91 +}
  92 +
  93 +; CHECK-LABEL: test_load_umin_32
  94 +; CHECK: membar
  95 +; CHECK: cmp
  96 +; CHECK: movleu %icc
  97 +; CHECK: cas [%o0]
  98 +; CHECK: membar
  99 +define zeroext i32 @test_load_umin_32(i32* %p, i32 zeroext %v) {
 100 +entry:
 101 +  %0 = atomicrmw umin i32* %p, i32 %v seq_cst
 102 +  ret i32 %0
 103 +}
 104 Index: lib/Target/Sparc/SparcInstr64Bit.td
 105 ===================================================================
 106 --- lib/Target/Sparc/SparcInstr64Bit.td
 107 +++ lib/Target/Sparc/SparcInstr64Bit.td
 108 @@ -438,6 +438,31 @@ def : Pat<(atomic_store ADDRri:$dst, i64:$val), (S
 109
 110  } // Predicates = [Is64Bit]
 111
 112 +let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1,
 113 +    Defs = [ICC] in
 114 +multiclass AtomicRMW<SDPatternOperator op32, SDPatternOperator op64> {
 115 +
 116 +  def _32 : Pseudo<(outs IntRegs:$rd),
 117 +                   (ins ptr_rc:$addr, IntRegs:$rs2), "",
 118 +                   [(set i32:$rd, (op32 iPTR:$addr, i32:$rs2))]>;
 119 +
 120 +  let Predicates = [Is64Bit] in
 121 +  def _64 : Pseudo<(outs I64Regs:$rd),
 122 +                   (ins ptr_rc:$addr, I64Regs:$rs2), "",
 123 +                   [(set i64:$rd, (op64 iPTR:$addr, i64:$rs2))]>;
 124 +}
 125 +
 126 +defm ATOMIC_LOAD_ADD  : AtomicRMW<atomic_load_add_32,  atomic_load_add_64>;
 127 +defm ATOMIC_LOAD_SUB  : AtomicRMW<atomic_load_sub_32,  atomic_load_sub_64>;
 128 +defm ATOMIC_LOAD_AND  : AtomicRMW<atomic_load_and_32,  atomic_load_and_64>;
 129 +defm ATOMIC_LOAD_OR   : AtomicRMW<atomic_load_or_32,   atomic_load_or_64>;
 130 +defm ATOMIC_LOAD_XOR  : AtomicRMW<atomic_load_xor_32,  atomic_load_xor_64>;
 131 +defm ATOMIC_LOAD_NAND : AtomicRMW<atomic_load_nand_32, atomic_load_nand_64>;
 132 +defm ATOMIC_LOAD_MIN  : AtomicRMW<atomic_load_min_32,  atomic_load_min_64>;
 133 +defm ATOMIC_LOAD_MAX  : AtomicRMW<atomic_load_max_32,  atomic_load_max_64>;
 134 +defm ATOMIC_LOAD_UMIN : AtomicRMW<atomic_load_umin_32, atomic_load_umin_64>;
 135 +defm ATOMIC_LOAD_UMAX : AtomicRMW<atomic_load_umax_32, atomic_load_umax_64>;
 136 +
 137  // Global addresses, constant pool entries
 138  let Predicates = [Is64Bit] in {
 139
 140 Index: lib/Target/Sparc/SparcISelLowering.cpp
 141 ===================================================================
 142 --- lib/Target/Sparc/SparcISelLowering.cpp
 143 +++ lib/Target/Sparc/SparcISelLowering.cpp
 144 @@ -2831,11 +2831,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) cons
 145  MachineBasicBlock *
 146  SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 147                                                   MachineBasicBlock *BB) const {
 148 -  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
 149 -  unsigned BROpcode;
 150 -  unsigned CC;
 151 -  DebugLoc dl = MI->getDebugLoc();
 152 -  // Figure out the conditional branch opcode to use for this select_cc.
 153    switch (MI->getOpcode()) {
 154    default: llvm_unreachable("Unknown SELECT_CC!");
 155    case SP::SELECT_CC_Int_ICC:
 156 @@ -2842,17 +2837,64 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
 157    case SP::SELECT_CC_FP_ICC:
 158    case SP::SELECT_CC_DFP_ICC:
 159    case SP::SELECT_CC_QFP_ICC:
 160 -    BROpcode = SP::BCOND;
 161 -    break;
 162 +    return expandSelectCC(MI, BB, SP::BCOND);
 163    case SP::SELECT_CC_Int_FCC:
 164    case SP::SELECT_CC_FP_FCC:
 165    case SP::SELECT_CC_DFP_FCC:
 166    case SP::SELECT_CC_QFP_FCC:
 167 -    BROpcode = SP::FBCOND;
 168 -    break;
 169 +    return expandSelectCC(MI, BB, SP::FBCOND);
 170 +
 171 +  case SP::ATOMIC_LOAD_ADD_32:
 172 +    return expandAtomicRMW(MI, BB, SP::ADDrr);
 173 +  case SP::ATOMIC_LOAD_ADD_64:
 174 +    return expandAtomicRMW(MI, BB, SP::ADDXrr);
 175 +  case SP::ATOMIC_LOAD_SUB_32:
 176 +    return expandAtomicRMW(MI, BB, SP::SUBrr);
 177 +  case SP::ATOMIC_LOAD_SUB_64:
 178 +    return expandAtomicRMW(MI, BB, SP::SUBXrr);
 179 +  case SP::ATOMIC_LOAD_AND_32:
 180 +    return expandAtomicRMW(MI, BB, SP::ANDrr);
 181 +  case SP::ATOMIC_LOAD_AND_64:
 182 +    return expandAtomicRMW(MI, BB, SP::ANDXrr);
 183 +  case SP::ATOMIC_LOAD_OR_32:
 184 +    return expandAtomicRMW(MI, BB, SP::ORrr);
 185 +  case SP::ATOMIC_LOAD_OR_64:
 186 +    return expandAtomicRMW(MI, BB, SP::ORXrr);
 187 +  case SP::ATOMIC_LOAD_XOR_32:
 188 +    return expandAtomicRMW(MI, BB, SP::XORrr);
 189 +  case SP::ATOMIC_LOAD_XOR_64:
 190 +    return expandAtomicRMW(MI, BB, SP::XORXrr);
 191 +  case SP::ATOMIC_LOAD_NAND_32:
 192 +    return expandAtomicRMW(MI, BB, SP::ANDrr);
 193 +  case SP::ATOMIC_LOAD_NAND_64:
 194 +    return expandAtomicRMW(MI, BB, SP::ANDXrr);
 195 +
 196 +  case SP::ATOMIC_LOAD_MAX_32:
 197 +    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_G);
 198 +  case SP::ATOMIC_LOAD_MAX_64:
 199 +    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_G);
 200 +  case SP::ATOMIC_LOAD_MIN_32:
 201 +    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LE);
 202 +  case SP::ATOMIC_LOAD_MIN_64:
 203 +    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LE);
 204 +  case SP::ATOMIC_LOAD_UMAX_32:
 205 +    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_GU);
 206 +  case SP::ATOMIC_LOAD_UMAX_64:
 207 +    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_GU);
 208 +  case SP::ATOMIC_LOAD_UMIN_32:
 209 +    return expandAtomicRMW(MI, BB, SP::MOVICCrr, SPCC::ICC_LEU);
 210 +  case SP::ATOMIC_LOAD_UMIN_64:
 211 +    return expandAtomicRMW(MI, BB, SP::MOVXCCrr, SPCC::ICC_LEU);
 212    }
 213 +}
 214
 215 -  CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
 216 +MachineBasicBlock*
 217 +SparcTargetLowering::expandSelectCC(MachineInstr *MI,
 218 +                                    MachineBasicBlock *BB,
 219 +                                    unsigned BROpcode) const {
 220 +  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
 221 +  DebugLoc dl = MI->getDebugLoc();
 222 +  unsigned CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
 223
 224    // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
 225    // control-flow pattern.  The incoming instruction knows the destination vreg
 226 @@ -2906,6 +2948,100 @@ SparcTargetLowering::EmitInstrWithCustomInserter(M
 227    return BB;
 228  }
 229
 230 +MachineBasicBlock*
 231 +SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
 232 +                                     MachineBasicBlock *MBB,
 233 +                                     unsigned Opcode,
 234 +                                     unsigned CondCode) const {
 235 +  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
 236 +  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
 237 +  DebugLoc DL = MI->getDebugLoc();
 238 +
 239 +  // MI is an atomic read-modify-write instruction of the form:
 240 +  //
 241 +  //   rd = atomicrmw<op> addr, rs2
 242 +  //
 243 +  // All three operands are registers.
 244 +  unsigned DestReg = MI->getOperand(0).getReg();
 245 +  unsigned AddrReg = MI->getOperand(1).getReg();
 246 +  unsigned Rs2Reg  = MI->getOperand(2).getReg();
 247 +
 248 +  // SelectionDAG has already inserted memory barriers before and after MI, so
 249 +  // we simply have to implement the operatiuon in terms of compare-and-swap.
 250 +  //
 251 +  //   %val0 = load %addr
 252 +  // loop:
 253 +  //   %val = phi %val0, %dest
 254 +  //   %upd = op %val, %rs2
 255 +  //   %dest = cas %addr, %upd, %val
 256 +  //   cmp %val, %dest
 257 +  //   bne loop
 258 +  // done:
 259 +  //
 260 +  bool is64Bit = SP::I64RegsRegClass.hasSubClassEq(MRI.getRegClass(DestReg));
 261 +  const TargetRegisterClass *ValueRC =
 262 +    is64Bit ? &SP::I64RegsRegClass : &SP::IntRegsRegClass;
 263 +  unsigned Val0Reg = MRI.createVirtualRegister(ValueRC);
 264 +
 265 +  BuildMI(*MBB, MI, DL, TII.get(is64Bit ? SP::LDXri : SP::LDri), Val0Reg)
 266 +    .addReg(AddrReg).addImm(0);
 267 +
 268 +  // Split the basic block MBB before MI and insert the loop block in the hole.
 269 +  MachineFunction::iterator MFI = MBB;
 270 +  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
 271 +  MachineFunction *MF = MBB->getParent();
 272 +  MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 273 +  MachineBasicBlock *DoneMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 274 +  ++MFI;
 275 +  MF->insert(MFI, LoopMBB);
 276 +  MF->insert(MFI, DoneMBB);
 277 +
 278 +  // Move MI and following instructions to DoneMBB.
 279 +  DoneMBB->splice(DoneMBB->begin(), MBB, MI, MBB->end());
 280 +  DoneMBB->transferSuccessorsAndUpdatePHIs(MBB);
 281 +
 282 +  // Connect the CFG again.
 283 +  MBB->addSuccessor(LoopMBB);
 284 +  LoopMBB->addSuccessor(LoopMBB);
 285 +  LoopMBB->addSuccessor(DoneMBB);
 286 +
 287 +  // Build the loop block.
 288 +  unsigned ValReg = MRI.createVirtualRegister(ValueRC);
 289 +  unsigned UpdReg = MRI.createVirtualRegister(ValueRC);
 290 +
 291 +  BuildMI(LoopMBB, DL, TII.get(SP::PHI), ValReg)
 292 +    .addReg(Val0Reg).addMBB(MBB)
 293 +    .addReg(DestReg).addMBB(LoopMBB);
 294 +
 295 +  if (CondCode) {
 296 +    // This is one of the min/max operations. We need a CMPrr followed by a
 297 +    // MOVXCC/MOVICC.
 298 +    BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(Rs2Reg);
 299 +    BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
 300 +      .addReg(ValReg).addReg(Rs2Reg).addImm(CondCode);
 301 +  } else {
 302 +    BuildMI(LoopMBB, DL, TII.get(Opcode), UpdReg)
 303 +      .addReg(ValReg).addReg(Rs2Reg);
 304 +  }
 305 +
 306 +  if (MI->getOpcode() == SP::ATOMIC_LOAD_NAND_32 ||
 307 +      MI->getOpcode() == SP::ATOMIC_LOAD_NAND_64) {
 308 +    unsigned TmpReg = UpdReg;
 309 +    UpdReg = MRI.createVirtualRegister(ValueRC);
 310 +    BuildMI(LoopMBB, DL, TII.get(SP::XORri), UpdReg).addReg(TmpReg).addImm(-1);
 311 +  }
 312 +
 313 +  BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::CASXrr : SP::CASrr), DestReg)
 314 +    .addReg(AddrReg).addReg(UpdReg).addReg(ValReg)
 315 +    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
 316 +  BuildMI(LoopMBB, DL, TII.get(SP::CMPrr)).addReg(ValReg).addReg(DestReg);
 317 +  BuildMI(LoopMBB, DL, TII.get(is64Bit ? SP::BPXCC : SP::BCOND))
 318 +    .addMBB(LoopMBB).addImm(SPCC::ICC_NE);
 319 +
 320 +  MI->eraseFromParent();
 321 +  return DoneMBB;
 322 +}
 323 +
 324  //===----------------------------------------------------------------------===//
 325  //                         Sparc Inline Assembly Support
 326  //===----------------------------------------------------------------------===//
 327 Index: lib/Target/Sparc/SparcISelLowering.h
 328 ===================================================================
 329 --- lib/Target/Sparc/SparcISelLowering.h
 330 +++ lib/Target/Sparc/SparcISelLowering.h
 331 @@ -165,6 +165,13 @@ namespace llvm {
 332      virtual void ReplaceNodeResults(SDNode *N,
 333                                      SmallVectorImpl<SDValue>& Results,
 334                                      SelectionDAG &DAG) const;
 335 +
 336 +    MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
 337 +                                      unsigned BROpcode) const;
 338 +    MachineBasicBlock *expandAtomicRMW(MachineInstr *MI,
 339 +                                       MachineBasicBlock *BB,
 340 +                                       unsigned Opcode,
 341 +                                       unsigned CondCode = 0) const;
 342    };
 343  } // end namespace llvm
 344