1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
12 /// This pass is merging consecutive CFAlus where applicable.
13 /// It needs to be called after IfCvt for best results.
14 //===----------------------------------------------------------------------===//
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
20 #include "R600MachineFunctionInfo.h"
21 #include "R600RegisterInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/raw_ostream.h"
30 #define DEBUG_TYPE "r600mergeclause"
34 static bool isCFAlu(const MachineInstr &MI) {
35 switch (MI.getOpcode()) {
37 case AMDGPU::CF_ALU_PUSH_BEFORE:
44 class R600ClauseMergePass : public MachineFunctionPass {
47 const R600InstrInfo *TII;
49 unsigned getCFAluSize(const MachineInstr &MI) const;
50 bool isCFAluEnabled(const MachineInstr &MI) const;
52 /// IfCvt pass can generate "disabled" ALU clause marker that need to be
53 /// removed and their content affected to the previous alu clause.
54 /// This function parse instructions after CFAlu until it find a disabled
55 /// CFAlu and merge the content, or an enabled CFAlu.
56 void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
58 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
60 bool mergeIfPossible(MachineInstr &RootCFAlu,
61 const MachineInstr &LatrCFAlu) const;
66 R600ClauseMergePass() : MachineFunctionPass(ID) { }
68 bool runOnMachineFunction(MachineFunction &MF) override;
70 StringRef getPassName() const override;
73 } // end anonymous namespace
75 INITIALIZE_PASS_BEGIN(R600ClauseMergePass, DEBUG_TYPE,
76 "R600 Clause Merge", false, false)
77 INITIALIZE_PASS_END(R600ClauseMergePass, DEBUG_TYPE,
78 "R600 Clause Merge", false, false)
80 char R600ClauseMergePass::ID = 0;
82 char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
84 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
87 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
91 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
94 .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
98 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
99 MachineInstr &CFAlu) const {
100 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
101 MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
104 while (I != E && !isCFAlu(*I))
108 MachineInstr &MI = *I++;
109 if (isCFAluEnabled(MI))
111 CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
112 MI.eraseFromParent();
116 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
117 const MachineInstr &LatrCFAlu) const {
118 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
119 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
120 unsigned RootInstCount = getCFAluSize(RootCFAlu),
121 LaterInstCount = getCFAluSize(LatrCFAlu);
122 unsigned CumuledInsts = RootInstCount + LaterInstCount;
123 if (CumuledInsts >= TII->getMaxAlusPerClause()) {
124 DEBUG(dbgs() << "Excess inst counts\n");
127 if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
129 // Is KCache Bank 0 compatible ?
131 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
133 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
135 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
136 if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
137 RootCFAlu.getOperand(Mode0Idx).getImm() &&
138 (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
139 RootCFAlu.getOperand(KBank0Idx).getImm() ||
140 LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
141 RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
142 DEBUG(dbgs() << "Wrong KC0\n");
145 // Is KCache Bank 1 compatible ?
147 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
149 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
151 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
152 if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
153 RootCFAlu.getOperand(Mode1Idx).getImm() &&
154 (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
155 RootCFAlu.getOperand(KBank1Idx).getImm() ||
156 LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
157 RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
158 DEBUG(dbgs() << "Wrong KC0\n");
161 if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
162 RootCFAlu.getOperand(Mode0Idx).setImm(
163 LatrCFAlu.getOperand(Mode0Idx).getImm());
164 RootCFAlu.getOperand(KBank0Idx).setImm(
165 LatrCFAlu.getOperand(KBank0Idx).getImm());
166 RootCFAlu.getOperand(KBank0LineIdx)
167 .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
169 if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
170 RootCFAlu.getOperand(Mode1Idx).setImm(
171 LatrCFAlu.getOperand(Mode1Idx).getImm());
172 RootCFAlu.getOperand(KBank1Idx).setImm(
173 LatrCFAlu.getOperand(KBank1Idx).getImm());
174 RootCFAlu.getOperand(KBank1LineIdx)
175 .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
177 RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
178 RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
182 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
183 if (skipFunction(MF.getFunction()))
186 const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
187 TII = ST.getInstrInfo();
189 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
191 MachineBasicBlock &MBB = *BB;
192 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
193 MachineBasicBlock::iterator LatestCFAlu = E;
195 MachineInstr &MI = *I++;
196 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
197 TII->mustBeLastInClause(MI.getOpcode()))
201 cleanPotentialDisabledCFAlu(MI);
203 if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
204 MI.eraseFromParent();
206 assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
214 StringRef R600ClauseMergePass::getPassName() const {
215 return "R600 Merge Clause Markers Pass";
218 llvm::FunctionPass *llvm::createR600ClauseMergePass() {
219 return new R600ClauseMergePass();