1 //==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the itinerary class data for the ARM ThunderX T8X
11 // (T88, T81, T83) processors.
12 // Loosely based on Cortex-A53 which is somewhat similar.
14 //===----------------------------------------------------------------------===//
16 // ===---------------------------------------------------------------------===//
17 // The following definitions describe the simpler per-operand machine model.
18 // This works with MachineScheduler. See llvm/MC/MCSchedule.h for details.
20 // Cavium ThunderX T8X scheduling machine model.
21 def ThunderXT8XModel : SchedMachineModel {
22 let IssueWidth = 2; // 2 micro-ops dispatched per cycle.
23 let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order.
24 let LoadLatency = 3; // Optimistic load latency.
25 let MispredictPenalty = 8; // Branch mispredict penalty.
26 let PostRAScheduler = 1; // Use PostRA scheduler.
27 let CompleteModel = 1;
30 // Modeling each pipeline with BufferSize == 0 since T8X is in-order.
31 def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
32 def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
33 def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
34 def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store
35 def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch
36 def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU
37 def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt
39 //===----------------------------------------------------------------------===//
40 // Subtarget-specific SchedWrite types mapping the ProcResources and
43 let SchedModel = ThunderXT8XModel in {
46 def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; }
47 def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; }
48 def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; }
49 def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; }
50 def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; }
51 def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; }
54 def : WriteRes<WriteIM32, [THXT8XUnitMAC]> {
56 let ResourceCycles = [1];
59 def : WriteRes<WriteIM64, [THXT8XUnitMAC]> {
61 let ResourceCycles = [1];
65 def : WriteRes<WriteID32, [THXT8XUnitDiv]> {
67 let ResourceCycles = [6];
70 def : WriteRes<WriteID64, [THXT8XUnitDiv]> {
72 let ResourceCycles = [8];
76 def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; }
77 def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; }
78 def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; }
81 def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> {
83 let ResourceCycles = [3];
86 def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> {
88 let ResourceCycles = [1];
91 def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> {
93 let ResourceCycles = [7];
96 def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> {
98 let ResourceCycles = [8];
101 def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> {
103 let ResourceCycles = [9];
106 def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> {
108 let ResourceCycles = [9];
112 def : WriteRes<WriteAdr, []> { let Latency = 0; }
115 def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; }
116 def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; }
117 def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; }
118 def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; }
121 def : WriteRes<WriteVST, [THXT8XUnitLdSt]>;
122 def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>;
124 def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> {
126 let ResourceCycles = [9];
129 def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> {
131 let ResourceCycles = [10];
134 def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
137 def : WriteRes<WriteBr, [THXT8XUnitBr]>;
138 def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>;
139 def : WriteRes<WriteBrReg, [THXT8XUnitBr]>;
140 def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>;
141 def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>;
142 def : WriteRes<WriteSys, [THXT8XUnitBr]>;
143 def : WriteRes<WriteBarrier, [THXT8XUnitBr]>;
144 def : WriteRes<WriteHint, [THXT8XUnitBr]>;
147 def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; }
148 def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
149 def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
150 def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
151 def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
152 def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
155 def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
156 def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> {
158 let ResourceCycles = [19];
161 def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; }
163 def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
165 let ResourceCycles = [9];
168 def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
170 let ResourceCycles = [19];
173 def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> {
175 let ResourceCycles = [14];
178 def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
180 let ResourceCycles = [28];
183 //===----------------------------------------------------------------------===//
184 // Subtarget-specific SchedRead types.
186 // No forwarding for these reads.
187 def : ReadAdvance<ReadExtrHi, 1>;
188 def : ReadAdvance<ReadAdrBase, 2>;
189 def : ReadAdvance<ReadVLD, 2>;
191 // FIXME: This needs more targeted benchmarking.
192 // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
193 // operands are needed one cycle later if and only if they are to be
194 // shifted. Otherwise, they too are needed two cycles later. This same
195 // ReadAdvance applies to Extended registers as well, even though there is
196 // a separate SchedPredicate for them.
197 def : ReadAdvance<ReadI, 2, [WriteImm, WriteI,
198 WriteISReg, WriteIEReg, WriteIS,
199 WriteID32, WriteID64,
200 WriteIM32, WriteIM64]>;
201 def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI,
202 WriteISReg, WriteIEReg, WriteIS,
203 WriteID32, WriteID64,
204 WriteIM32, WriteIM64]>;
205 def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI,
206 WriteISReg, WriteIEReg, WriteIS,
207 WriteID32, WriteID64,
208 WriteIM32, WriteIM64]>;
209 def THXT8XReadISReg : SchedReadVariant<[
210 SchedVar<RegShiftedPred, [THXT8XReadShifted]>,
211 SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
212 def : SchedAlias<ReadISReg, THXT8XReadISReg>;
214 def THXT8XReadIEReg : SchedReadVariant<[
215 SchedVar<RegExtendedPred, [THXT8XReadShifted]>,
216 SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>;
217 def : SchedAlias<ReadIEReg, THXT8XReadIEReg>;
219 // MAC - Operands are generally needed one cycle later in the MAC pipe.
220 // Accumulator operands are needed two cycles later.
221 def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
222 WriteISReg, WriteIEReg, WriteIS,
223 WriteID32, WriteID64,
224 WriteIM32, WriteIM64]>;
225 def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI,
226 WriteISReg, WriteIEReg, WriteIS,
227 WriteID32, WriteID64,
228 WriteIM32, WriteIM64]>;
231 def : ReadAdvance<ReadID, 1, [WriteImm, WriteI,
232 WriteISReg, WriteIEReg, WriteIS,
233 WriteID32, WriteID64,
234 WriteIM32, WriteIM64]>;
236 //===----------------------------------------------------------------------===//
237 // Subtarget-specific InstRW.
242 def : InstRW<[THXT8XWriteBR], (instregex "^B")>;
243 def : InstRW<[THXT8XWriteBR], (instregex "^BL")>;
244 def : InstRW<[THXT8XWriteBR], (instregex "^B.*")>;
245 def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>;
246 def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>;
247 def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>;
248 def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>;
249 def : InstRW<[THXT8XWriteBRR], (instregex "^BR")>;
250 def : InstRW<[THXT8XWriteBRR], (instregex "^BLR")>;
255 def : InstRW<[THXT8XWriteRET], (instregex "^RET")>;
260 def : InstRW<[WriteI], (instrs COPY)>;
265 def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
266 def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
267 def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
268 def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
269 def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
270 def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
271 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
272 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
273 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
274 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
275 def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
276 def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
278 def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
279 def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
280 def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
281 def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
282 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
283 def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
284 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
285 def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
287 def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
288 def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
289 def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
290 def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>;
291 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
292 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
293 def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
294 def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
296 def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
297 def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
298 def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
299 def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>;
300 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
301 def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
302 def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
303 def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
308 def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>;
309 def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
310 def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
311 def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
312 def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
313 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
314 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
315 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
316 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
317 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
319 def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>;
320 def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
321 def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
322 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
323 def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
324 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
326 def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>;
327 def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
328 def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>;
329 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
330 def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
331 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
333 def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>;
334 def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
335 def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>;
336 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
337 def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
338 def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
341 // Floating Point MAC, DIV, SQRT
343 def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
344 def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>;
345 def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>;
346 def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>;
347 def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>;
348 def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>;
349 def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
350 def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;