1 //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// \brief Memory legalizer - implements memory model. More information can be
13 /// http://llvm.org/docs/AMDGPUUsage.html#memory-model
15 //===----------------------------------------------------------------------===//
18 #include "AMDGPUMachineModuleInfo.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIDefines.h"
21 #include "SIInstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineMemOperand.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/IR/DiagnosticInfo.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/LLVMContext.h"
36 #include "llvm/MC/MCInstrDesc.h"
37 #include "llvm/Pass.h"
38 #include "llvm/Support/AtomicOrdering.h"
43 using namespace llvm::AMDGPU;
45 #define DEBUG_TYPE "si-memory-legalizer"
46 #define PASS_NAME "SI Memory Legalizer"
50 class SIMemOpInfo final {
52 SyncScope::ID SSID = SyncScope::System;
53 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
54 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
55 bool IsNonTemporal = false;
57 SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering)
58 : SSID(SSID), Ordering(Ordering) {}
60 SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering,
61 AtomicOrdering FailureOrdering, bool IsNonTemporal = false)
62 : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering),
63 IsNonTemporal(IsNonTemporal) {}
65 /// \returns Info constructed from \p MI, which has at least machine memory
67 static Optional<SIMemOpInfo> constructFromMIWithMMO(
68 const MachineBasicBlock::iterator &MI);
71 /// \returns Synchronization scope ID of the machine instruction used to
72 /// create this SIMemOpInfo.
73 SyncScope::ID getSSID() const {
76 /// \returns Ordering constraint of the machine instruction used to
77 /// create this SIMemOpInfo.
78 AtomicOrdering getOrdering() const {
81 /// \returns Failure ordering constraint of the machine instruction used to
82 /// create this SIMemOpInfo.
83 AtomicOrdering getFailureOrdering() const {
84 return FailureOrdering;
86 /// \returns True if memory access of the machine instruction used to
87 /// create this SIMemOpInfo is non-temporal, false otherwise.
88 bool isNonTemporal() const {
92 /// \returns True if ordering constraint of the machine instruction used to
93 /// create this SIMemOpInfo is unordered or higher, false otherwise.
94 bool isAtomic() const {
95 return Ordering != AtomicOrdering::NotAtomic;
98 /// \returns Load info if \p MI is a load operation, "None" otherwise.
99 static Optional<SIMemOpInfo> getLoadInfo(
100 const MachineBasicBlock::iterator &MI);
101 /// \returns Store info if \p MI is a store operation, "None" otherwise.
102 static Optional<SIMemOpInfo> getStoreInfo(
103 const MachineBasicBlock::iterator &MI);
104 /// \returns Atomic fence info if \p MI is an atomic fence operation,
105 /// "None" otherwise.
106 static Optional<SIMemOpInfo> getAtomicFenceInfo(
107 const MachineBasicBlock::iterator &MI);
108 /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation,
109 /// "None" otherwise.
110 static Optional<SIMemOpInfo> getAtomicCmpxchgInfo(
111 const MachineBasicBlock::iterator &MI);
112 /// \returns Atomic rmw info if \p MI is an atomic rmw operation,
113 /// "None" otherwise.
114 static Optional<SIMemOpInfo> getAtomicRmwInfo(
115 const MachineBasicBlock::iterator &MI);
117 /// \brief Reports unknown synchronization scope used in \p MI to LLVM
119 static void reportUnknownSyncScope(
120 const MachineBasicBlock::iterator &MI);
123 class SIMemoryLegalizer final : public MachineFunctionPass {
125 /// \brief Machine module info.
126 const AMDGPUMachineModuleInfo *MMI = nullptr;
128 /// \brief Instruction info.
129 const SIInstrInfo *TII = nullptr;
131 /// \brief Immediate for "vmcnt(0)".
132 unsigned Vmcnt0Immediate = 0;
134 /// \brief Opcode for cache invalidation instruction (L1).
135 unsigned Wbinvl1Opcode = 0;
137 /// \brief List of atomic pseudo instructions.
138 std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
140 /// \brief Sets named bit (BitName) to "true" if present in \p MI. Returns
141 /// true if \p MI is modified, false otherwise.
142 template <uint16_t BitName>
143 bool enableNamedBit(const MachineBasicBlock::iterator &MI) const {
144 int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName);
148 MachineOperand &Bit = MI->getOperand(BitIdx);
149 if (Bit.getImm() != 0)
156 /// \brief Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
157 /// is modified, false otherwise.
158 bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
159 return enableNamedBit<AMDGPU::OpName::glc>(MI);
162 /// \brief Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
163 /// is modified, false otherwise.
164 bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
165 return enableNamedBit<AMDGPU::OpName::slc>(MI);
168 /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI.
169 /// Always returns true.
170 bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
171 bool Before = true) const;
172 /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI.
173 /// Always returns true.
174 bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
175 bool Before = true) const;
177 /// \brief Removes all processed atomic pseudo instructions from the current
178 /// function. Returns true if current function is modified, false otherwise.
179 bool removeAtomicPseudoMIs();
181 /// \brief Expands load operation \p MI. Returns true if instructions are
182 /// added/deleted or \p MI is modified, false otherwise.
183 bool expandLoad(const SIMemOpInfo &MOI,
184 MachineBasicBlock::iterator &MI);
185 /// \brief Expands store operation \p MI. Returns true if instructions are
186 /// added/deleted or \p MI is modified, false otherwise.
187 bool expandStore(const SIMemOpInfo &MOI,
188 MachineBasicBlock::iterator &MI);
189 /// \brief Expands atomic fence operation \p MI. Returns true if
190 /// instructions are added/deleted or \p MI is modified, false otherwise.
191 bool expandAtomicFence(const SIMemOpInfo &MOI,
192 MachineBasicBlock::iterator &MI);
193 /// \brief Expands atomic cmpxchg operation \p MI. Returns true if
194 /// instructions are added/deleted or \p MI is modified, false otherwise.
195 bool expandAtomicCmpxchg(const SIMemOpInfo &MOI,
196 MachineBasicBlock::iterator &MI);
197 /// \brief Expands atomic rmw operation \p MI. Returns true if
198 /// instructions are added/deleted or \p MI is modified, false otherwise.
199 bool expandAtomicRmw(const SIMemOpInfo &MOI,
200 MachineBasicBlock::iterator &MI);
205 SIMemoryLegalizer() : MachineFunctionPass(ID) {}
207 void getAnalysisUsage(AnalysisUsage &AU) const override {
208 AU.setPreservesCFG();
209 MachineFunctionPass::getAnalysisUsage(AU);
212 StringRef getPassName() const override {
216 bool runOnMachineFunction(MachineFunction &MF) override;
219 } // end namespace anonymous
222 Optional<SIMemOpInfo> SIMemOpInfo::constructFromMIWithMMO(
223 const MachineBasicBlock::iterator &MI) {
224 assert(MI->getNumMemOperands() > 0);
226 const MachineFunction *MF = MI->getParent()->getParent();
227 const AMDGPUMachineModuleInfo *MMI =
228 &MF->getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
230 SyncScope::ID SSID = SyncScope::SingleThread;
231 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
232 AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
233 bool IsNonTemporal = true;
235 // Validator should check whether or not MMOs cover the entire set of
236 // locations accessed by the memory instruction.
237 for (const auto &MMO : MI->memoperands()) {
238 const auto &IsSyncScopeInclusion =
239 MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
240 if (!IsSyncScopeInclusion) {
241 reportUnknownSyncScope(MI);
245 SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
247 isStrongerThan(Ordering, MMO->getOrdering()) ?
248 Ordering : MMO->getOrdering();
250 isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
251 FailureOrdering : MMO->getFailureOrdering();
253 if (!(MMO->getFlags() & MachineMemOperand::MONonTemporal))
254 IsNonTemporal = false;
257 return SIMemOpInfo(SSID, Ordering, FailureOrdering, IsNonTemporal);
261 Optional<SIMemOpInfo> SIMemOpInfo::getLoadInfo(
262 const MachineBasicBlock::iterator &MI) {
263 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
265 if (!(MI->mayLoad() && !MI->mayStore()))
268 // Be conservative if there are no memory operands.
269 if (MI->getNumMemOperands() == 0)
270 return SIMemOpInfo(SyncScope::System,
271 AtomicOrdering::SequentiallyConsistent);
273 return SIMemOpInfo::constructFromMIWithMMO(MI);
277 Optional<SIMemOpInfo> SIMemOpInfo::getStoreInfo(
278 const MachineBasicBlock::iterator &MI) {
279 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
281 if (!(!MI->mayLoad() && MI->mayStore()))
284 // Be conservative if there are no memory operands.
285 if (MI->getNumMemOperands() == 0)
286 return SIMemOpInfo(SyncScope::System,
287 AtomicOrdering::SequentiallyConsistent);
289 return SIMemOpInfo::constructFromMIWithMMO(MI);
293 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicFenceInfo(
294 const MachineBasicBlock::iterator &MI) {
295 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
297 if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
301 static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
302 AtomicOrdering Ordering =
303 static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
304 return SIMemOpInfo(SSID, Ordering);
308 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicCmpxchgInfo(
309 const MachineBasicBlock::iterator &MI) {
310 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
312 if (!(MI->mayLoad() && MI->mayStore()))
315 // Be conservative if there are no memory operands.
316 if (MI->getNumMemOperands() == 0)
317 return SIMemOpInfo(SyncScope::System,
318 AtomicOrdering::SequentiallyConsistent,
319 AtomicOrdering::SequentiallyConsistent);
321 return SIMemOpInfo::constructFromMIWithMMO(MI);
325 Optional<SIMemOpInfo> SIMemOpInfo::getAtomicRmwInfo(
326 const MachineBasicBlock::iterator &MI) {
327 assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
329 if (!(MI->mayLoad() && MI->mayStore()))
332 // Be conservative if there are no memory operands.
333 if (MI->getNumMemOperands() == 0)
334 return SIMemOpInfo(SyncScope::System,
335 AtomicOrdering::SequentiallyConsistent);
337 return SIMemOpInfo::constructFromMIWithMMO(MI);
341 void SIMemOpInfo::reportUnknownSyncScope(
342 const MachineBasicBlock::iterator &MI) {
343 DiagnosticInfoUnsupported Diag(MI->getParent()->getParent()->getFunction(),
344 "Unsupported synchronization scope");
345 LLVMContext *CTX = &MI->getParent()->getParent()->getFunction().getContext();
349 bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI,
351 MachineBasicBlock &MBB = *MI->getParent();
352 DebugLoc DL = MI->getDebugLoc();
357 BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode));
365 bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI,
367 MachineBasicBlock &MBB = *MI->getParent();
368 DebugLoc DL = MI->getDebugLoc();
373 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate);
381 bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
382 if (AtomicPseudoMIs.empty())
385 for (auto &MI : AtomicPseudoMIs)
386 MI->eraseFromParent();
388 AtomicPseudoMIs.clear();
392 bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
393 MachineBasicBlock::iterator &MI) {
394 assert(MI->mayLoad() && !MI->mayStore());
396 bool Changed = false;
398 if (MOI.isAtomic()) {
399 if (MOI.getSSID() == SyncScope::System ||
400 MOI.getSSID() == MMI->getAgentSSID()) {
401 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
402 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
403 Changed |= enableGLCBit(MI);
405 if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
406 Changed |= insertWaitcntVmcnt0(MI);
408 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
409 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
410 Changed |= insertWaitcntVmcnt0(MI, false);
411 Changed |= insertBufferWbinvl1Vol(MI, false);
417 if (MOI.getSSID() == SyncScope::SingleThread ||
418 MOI.getSSID() == MMI->getWorkgroupSSID() ||
419 MOI.getSSID() == MMI->getWavefrontSSID()) {
423 llvm_unreachable("Unsupported synchronization scope");
426 // Atomic instructions do not have the nontemporal attribute.
427 if (MOI.isNonTemporal()) {
428 Changed |= enableGLCBit(MI);
429 Changed |= enableSLCBit(MI);
436 bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
437 MachineBasicBlock::iterator &MI) {
438 assert(!MI->mayLoad() && MI->mayStore());
440 bool Changed = false;
442 if (MOI.isAtomic()) {
443 if (MOI.getSSID() == SyncScope::System ||
444 MOI.getSSID() == MMI->getAgentSSID()) {
445 if (MOI.getOrdering() == AtomicOrdering::Release ||
446 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
447 Changed |= insertWaitcntVmcnt0(MI);
452 if (MOI.getSSID() == SyncScope::SingleThread ||
453 MOI.getSSID() == MMI->getWorkgroupSSID() ||
454 MOI.getSSID() == MMI->getWavefrontSSID()) {
458 llvm_unreachable("Unsupported synchronization scope");
461 // Atomic instructions do not have the nontemporal attribute.
462 if (MOI.isNonTemporal()) {
463 Changed |= enableGLCBit(MI);
464 Changed |= enableSLCBit(MI);
471 bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
472 MachineBasicBlock::iterator &MI) {
473 assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
475 bool Changed = false;
477 if (MOI.isAtomic()) {
478 if (MOI.getSSID() == SyncScope::System ||
479 MOI.getSSID() == MMI->getAgentSSID()) {
480 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
481 MOI.getOrdering() == AtomicOrdering::Release ||
482 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
483 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
484 Changed |= insertWaitcntVmcnt0(MI);
486 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
487 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
488 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
489 Changed |= insertBufferWbinvl1Vol(MI);
491 AtomicPseudoMIs.push_back(MI);
495 if (MOI.getSSID() == SyncScope::SingleThread ||
496 MOI.getSSID() == MMI->getWorkgroupSSID() ||
497 MOI.getSSID() == MMI->getWavefrontSSID()) {
498 AtomicPseudoMIs.push_back(MI);
502 SIMemOpInfo::reportUnknownSyncScope(MI);
508 bool SIMemoryLegalizer::expandAtomicCmpxchg(const SIMemOpInfo &MOI,
509 MachineBasicBlock::iterator &MI) {
510 assert(MI->mayLoad() && MI->mayStore());
512 bool Changed = false;
514 if (MOI.isAtomic()) {
515 if (MOI.getSSID() == SyncScope::System ||
516 MOI.getSSID() == MMI->getAgentSSID()) {
517 if (MOI.getOrdering() == AtomicOrdering::Release ||
518 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
519 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
520 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
521 Changed |= insertWaitcntVmcnt0(MI);
523 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
524 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
525 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
526 MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
527 MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
528 Changed |= insertWaitcntVmcnt0(MI, false);
529 Changed |= insertBufferWbinvl1Vol(MI, false);
535 if (MOI.getSSID() == SyncScope::SingleThread ||
536 MOI.getSSID() == MMI->getWorkgroupSSID() ||
537 MOI.getSSID() == MMI->getWavefrontSSID()) {
538 Changed |= enableGLCBit(MI);
542 llvm_unreachable("Unsupported synchronization scope");
548 bool SIMemoryLegalizer::expandAtomicRmw(const SIMemOpInfo &MOI,
549 MachineBasicBlock::iterator &MI) {
550 assert(MI->mayLoad() && MI->mayStore());
552 bool Changed = false;
554 if (MOI.isAtomic()) {
555 if (MOI.getSSID() == SyncScope::System ||
556 MOI.getSSID() == MMI->getAgentSSID()) {
557 if (MOI.getOrdering() == AtomicOrdering::Release ||
558 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
559 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
560 Changed |= insertWaitcntVmcnt0(MI);
562 if (MOI.getOrdering() == AtomicOrdering::Acquire ||
563 MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
564 MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
565 Changed |= insertWaitcntVmcnt0(MI, false);
566 Changed |= insertBufferWbinvl1Vol(MI, false);
572 if (MOI.getSSID() == SyncScope::SingleThread ||
573 MOI.getSSID() == MMI->getWorkgroupSSID() ||
574 MOI.getSSID() == MMI->getWavefrontSSID()) {
575 Changed |= enableGLCBit(MI);
579 llvm_unreachable("Unsupported synchronization scope");
585 bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
586 bool Changed = false;
587 const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
588 const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
590 MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
591 TII = ST.getInstrInfo();
594 AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV));
595 Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ?
596 AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL;
598 for (auto &MBB : MF) {
599 for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
600 if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
603 if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI))
604 Changed |= expandLoad(MOI.getValue(), MI);
605 else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI))
606 Changed |= expandStore(MOI.getValue(), MI);
607 else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI))
608 Changed |= expandAtomicFence(MOI.getValue(), MI);
609 else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgInfo(MI))
610 Changed |= expandAtomicCmpxchg(MOI.getValue(), MI);
611 else if (const auto &MOI = SIMemOpInfo::getAtomicRmwInfo(MI))
612 Changed |= expandAtomicRmw(MOI.getValue(), MI);
616 Changed |= removeAtomicPseudoMIs();
620 INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
622 char SIMemoryLegalizer::ID = 0;
623 char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
625 FunctionPass *llvm::createSIMemoryLegalizerPass() {
626 return new SIMemoryLegalizer();