]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
Update vis(3) the latest from NetBSD.
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / ARM / ARMScheduleR52.td
1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11 //
12 //===----------------------------------------------------------------------===//
13
14 // ===---------------------------------------------------------------------===//
15 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17 // There are two ALUs, one LDST, one MUL  and a non-pipelined integer DIV.
18 // A number of forwarding paths enable results of computations to be input
19 // to subsequent operations before they are written to registers.
20 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22 def CortexR52Model : SchedMachineModel {
23   let MicroOpBufferSize = 0;  // R52 is in-order processor
24   let IssueWidth = 2;         // 2 micro-ops dispatched per cycle
25   let LoadLatency = 1;        // Optimistic, assuming no misses
26   let MispredictPenalty = 8;  // A branch direction mispredict, including PFU
27   let PostRAScheduler = 1;    // Enable PostRA scheduler pass.
28   let CompleteModel = 0;      // Covers instructions applicable to cortex-r52.
29 }
30
31
32 //===----------------------------------------------------------------------===//
33 // Define each kind of processor resource and number available.
34
35 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
36 // Cortex-R52 is an in-order processor.
37
38 def R52UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
39 def R52UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
40 def R52UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
41 def R52UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load/Store
42 def R52UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
43 def R52UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
44 def R52UnitFPMUL  : ProcResource<2> { let BufferSize = 0; } // FP MUL
45 def R52UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP DIV
46
47 // Cortex-R52 specific SchedReads
48 def R52Read_ISS   : SchedRead;
49 def R52Read_EX1   : SchedRead;
50 def R52Read_EX2   : SchedRead;
51 def R52Read_WRI   : SchedRead;
52 def R52Read_F0    : SchedRead; // F0 maps to ISS stage of integer pipe
53 def R52Read_F1    : SchedRead;
54 def R52Read_F2    : SchedRead;
55
56
57 //===----------------------------------------------------------------------===//
58 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
59
60 let SchedModel = CortexR52Model in {
61
62 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
63 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
65 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
66 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
67
68 // Compares
69 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
70 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
71 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
72
73 // Multiply - aliased to sub-target specific later
74
75 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
76 def : WriteRes<WriteDIV, [R52UnitDiv]> {
77   let Latency = 8; let ResourceCycles = [8]; // non-pipelined
78 }
79
80 // Branches  - LR written in Late EX2
81 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
82 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
83 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
84
85 // Misc
86 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
87
88 // Integer pipeline by-passes
89 def : ReadAdvance<ReadALU, 1>;   // Operand needed in EX1 stage
90 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
91 def : ReadAdvance<ReadMUL, 0>;
92 def : ReadAdvance<ReadMAC, 0>;
93
94 // Floating-point. Map target-defined SchedReadWrites to subtarget
95 def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
96
97 def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
98   let Latency = 6;
99 }
100
101 def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
102   let Latency = 11;     // as it is internally two insns (MUL then ADD)
103 }
104
105 def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
106                               R52UnitFPALU, R52UnitFPALU]> {
107   let Latency = 11;
108 }
109
110 def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
111   let Latency = 7;          // FP div takes fixed #cycles
112   let ResourceCycles = [7]; // is not pipelined
113 }
114
115 def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
116   let Latency = 17;
117   let ResourceCycles = [17];
118 }
119
120 def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
121 def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
122
123 // Overriden via InstRW for this processor.
124 def : WriteRes<WriteVST1, []>;
125 def : WriteRes<WriteVST2, []>;
126 def : WriteRes<WriteVST3, []>;
127 def : WriteRes<WriteVST4, []>;
128
129 def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
130 def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
131
132 //===----------------------------------------------------------------------===//
133 // Subtarget-specific SchedReadWrites.
134
135 // Forwarding information - based on when an operand is read
136 def : ReadAdvance<R52Read_ISS, 0>;
137 def : ReadAdvance<R52Read_EX1, 1>;
138 def : ReadAdvance<R52Read_EX2, 2>;
139 def : ReadAdvance<R52Read_F0, 0>;
140 def : ReadAdvance<R52Read_F1, 1>;
141 def : ReadAdvance<R52Read_F2, 2>;
142
143
144 // Cortex-R52 specific SchedWrites for use with InstRW
145 def R52WriteMAC        : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
146 def R52WriteMACHi      : SchedWriteRes<[R52UnitMAC]> {
147   let Latency = 4; let NumMicroOps = 0;
148 }
149 def R52WriteDIV        : SchedWriteRes<[R52UnitDiv]> {
150   let Latency = 8; let ResourceCycles = [8]; // not pipelined
151 }
152 def R52WriteLd         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
153 def R52WriteST         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
154 def R52WriteAdr        : SchedWriteRes<[]> { let Latency = 0; }
155 def R52WriteCC         : SchedWriteRes<[]> { let Latency = 0; }
156 def R52WriteALU_EX1    : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
157 def R52WriteALU_EX2    : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
158 def R52WriteALU_WRI    : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
159
160 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
161 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
162
163 // Alias generics to sub-target specific
164 def : SchedAlias<WriteMUL16, R52WriteMAC>;
165 def : SchedAlias<WriteMUL32, R52WriteMAC>;
166 def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
167 def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
168 def : SchedAlias<WriteMAC16, R52WriteMAC>;
169 def : SchedAlias<WriteMAC32, R52WriteMAC>;
170 def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
171 def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
172 def : SchedAlias<WritePreLd, R52WriteLd>;
173 def : SchedAlias<WriteLd, R52WriteLd>;
174 def : SchedAlias<WriteST, R52WriteST>;
175
176 def R52WriteFPALU_F3   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
177 def R52Write2FPALU_F3  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
178   let Latency = 4;
179 }
180 def R52WriteFPALU_F4   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
181 def R52Write2FPALU_F4  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
182   let Latency = 5;
183 }
184 def R52WriteFPALU_F5   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
185 def R52Write2FPALU_F5  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
186   let Latency = 6;
187 }
188 def R52WriteFPMUL_F5   : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
189 def R52Write2FPMUL_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
190   let Latency = 6;
191 }
192 def R52WriteFPMAC_F5   : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
193   let Latency = 11;     // as it is internally two insns (MUL then ADD)
194 }
195 def R52Write2FPMAC_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
196                                          R52UnitFPALU, R52UnitFPALU]> {
197   let Latency = 11;
198 }
199
200 def R52WriteFPLd_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
201 def R52WriteFPST_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
202
203 //===----------------------------------------------------------------------===//
204 // Floating-point. Map target defined SchedReadWrites to processor specific ones
205 //
206 def : SchedAlias<WriteFPCVT,   R52WriteFPALU_F5>;
207 def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
208 def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
209 def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
210
211 //===----------------------------------------------------------------------===//
212 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
213 //
214 def : InstRW<[WriteALU], (instrs COPY)>;
215
216 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
217       (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
218       "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
219
220 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
221       (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
222       "t2MOVi", "t2MOV_ga_dyn")>;
223 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
224       (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
225 def : InstRW<[R52WriteLd,R52Read_ISS],
226       (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
227
228 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
229
230 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
231       (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
232       "(t|t2)UBFX", "(t|t2)SBFX")>;
233
234 // Saturating arithmetic
235 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
236       (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
237       "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
238       "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
239       "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
240       "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
241       "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
242
243 // Parallel arithmetic
244 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
245       (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
246       "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
247       "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
248       "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
249
250 // Flag setting.
251 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
252       (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
253       "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
254       "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
255       "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
256       "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
257       "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
258
259 // Sum of Absolute Difference
260 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
261       (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
262
263 // Integer Multiply
264 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
265       (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
266       "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
267       "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
268       "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
269
270 // Multiply Accumulate
271 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
272 // The store pipeline is used partly for 64-bit operations.
273 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
274       (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
275       "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
276       "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
277       "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
278       "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
279       "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
280       "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
281       "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
282       "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
283       "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
284       "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
285       "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
286       "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
287
288 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
289       (instregex "t2SDIV", "t2UDIV")>;
290
291 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
292 // However, that's non-trivial to specify, so we keep it uniform
293 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
294       (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
295       "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
296       "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
297       "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
298       "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
299       "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
300 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
301       (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
302       "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
303       "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
304       "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
305       "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
306       "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
307
308 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
309 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
310
311 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
312       "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
313       "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
314       "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
315
316 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
317       "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
318       "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
319
320 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
321       "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
322       "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
323
324 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
325       (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
326       "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
327
328 def : InstRW<[R52WriteALU_EX1],
329     (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
330
331 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
332 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
333       (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
334
335 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
336 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
337 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
338 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
339
340 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
341       (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
342
343 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
344
345 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
346 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
347
348 // Integer Load, Multiple.
349 foreach Lat = 3-25 in {
350   def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
351     let Latency = Lat;
352   }
353   def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
354     let Latency = Lat;
355     let NumMicroOps = 0;
356   }
357 }
358 foreach NAddr = 1-16 in {
359   def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
360 }
361 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
362 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
363 def R52WriteILDM : SchedWriteVariant<[
364     SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
365
366     SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
367                                  R52WriteILDM6Cy]>,
368     SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
369                                  R52WriteILDM6Cy, R52WriteILDM7Cy]>,
370
371     SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
372                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
373                                  R52WriteILDM8Cy]>,
374     SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
375                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
376                                  R52WriteILDM8Cy, R52WriteILDM9Cy]>,
377
378     SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
379                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
380                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
381                                  R52WriteILDM10Cy]>,
382     SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
383                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
384                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
385                                  R52WriteILDM10Cy, R52WriteILDM11Cy]>,
386
387     SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
388                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
389                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
390                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
391                                  R52WriteILDM12Cy]>,
392     SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
393                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
394                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
395                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
396                                  R52WriteILDM12Cy, R52WriteILDM13Cy]>,
397
398     SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
399                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
400                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
401                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
402                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
403                                  R52WriteILDM14Cy]>,
404     SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
405                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
406                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
407                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
408                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
409                                  R52WriteILDM14Cy, R52WriteILDM15Cy]>,
410
411     SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
412                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
413                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
414                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
415                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
416                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
417                                  R52WriteILDM16Cy]>,
418     SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
419                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
420                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
421                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
422                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
423                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
424                                  R52WriteILDM16Cy, R52WriteILDM17Cy]>,
425
426     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
427                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
428                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
429                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
430                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
431                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
432                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
433                                  R52WriteILDM18Cy]>,
434     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
435                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
436                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
437                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
438                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
439                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
440                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
441                                  R52WriteILDM18Cy, R52WriteILDM19Cy]>,
442
443 // Unknown number of registers, just use resources for two registers.
444     SchedVar<NoSchedPred,      [R52WriteILDM4Cy, R52WriteILDM5Cy,
445                                 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
446                                 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
447                                 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
448                                 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
449                                 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
450                                 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
451                                 R52WriteILDM18Cy, R52WriteILDM19Cy]>
452 ]> { let Variadic=1; }
453
454 // Integer Store, Multiple
455 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
456   let Latency = 4;
457   let NumMicroOps = 2;
458 }
459 foreach NumAddr = 1-16 in {
460   def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
461 }
462 def R52WriteISTM : SchedWriteVariant<[
463     SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
464     SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
465     SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
466     SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
467     SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
468     SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
469     SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
470     SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
471     SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
472     SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
473     SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
474     SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
475     SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
476     SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
477     SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
478     // Unknow number of registers, just use resources for two registers.
479     SchedVar<NoSchedPred,      [R52WriteISTM2]>
480 ]>;
481
482 def : InstRW<[R52WriteILDM, R52Read_ISS],
483       (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
484       "(t|sys)LDM(IA|DA|DB|IB)$")>;
485 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
486       (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
487 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
488         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
489
490 // Integer Store, Single Element
491 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
492       (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
493       "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
494       "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
495
496 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
497       (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
498       "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
499       "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
500       "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
501
502 // Integer Store, Dual
503 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
504     (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
505 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
506     (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
507
508 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
509     (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
510 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
511     (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
512     "PUSH", "tPUSH")>;
513
514 // LDRLIT pseudo instructions, they expand to LDR + PICADD
515 def : InstRW<[R52WriteLd],
516       (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
517 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
518 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
519
520
521
522 //===----------------------------------------------------------------------===//
523 // VFP, Floating Point Support
524 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
525 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
526
527 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
528 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
529 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
530
531 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
532 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
533
534 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
535 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
536
537 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
538 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
539
540
541 //===----------------------------------------------------------------------===//
542 // Neon Support
543
544 // vector multiple load stores
545 foreach NumAddr = 1-16 in {
546   def R52LMAddrPred#NumAddr :
547     SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
548 }
549 foreach Lat = 1-32 in {
550   def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
551     let Latency = Lat;
552   }
553 }
554 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
555   def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
556     let Latency = 0;
557     let NumMicroOps = Num;
558     let ResourceCycles = [Num];
559   }
560 }
561 def R52WriteVLDM : SchedWriteVariant<[
562   // 1 D reg
563   SchedVar<R52LMAddrPred1,  [R52WriteLM5Cy,
564                               R52ReserveLd5Cy]>,
565   SchedVar<R52LMAddrPred2,  [R52WriteLM5Cy,
566                               R52ReserveLd5Cy]>,
567
568   // 2 D reg
569   SchedVar<R52LMAddrPred3,  [R52WriteLM5Cy, R52WriteLM6Cy,
570                               R52ReserveLd6Cy]>,
571   SchedVar<R52LMAddrPred4,  [R52WriteLM5Cy, R52WriteLM6Cy,
572                               R52ReserveLd6Cy]>,
573
574   // 3 D reg
575   SchedVar<R52LMAddrPred5,  [R52WriteLM5Cy, R52WriteLM6Cy,
576                               R52WriteLM7Cy,
577                               R52ReserveLd4Cy]>,
578   SchedVar<R52LMAddrPred6,  [R52WriteLM5Cy, R52WriteLM6Cy,
579                               R52WriteLM7Cy,
580                               R52ReserveLd7Cy]>,
581
582   // 4 D reg
583   SchedVar<R52LMAddrPred7,  [R52WriteLM5Cy, R52WriteLM6Cy,
584                               R52WriteLM7Cy, R52WriteLM8Cy,
585                               R52ReserveLd8Cy]>,
586   SchedVar<R52LMAddrPred8,  [R52WriteLM5Cy, R52WriteLM6Cy,
587                               R52WriteLM7Cy, R52WriteLM8Cy,
588                               R52ReserveLd8Cy]>,
589
590   // 5 D reg
591   SchedVar<R52LMAddrPred9,  [R52WriteLM5Cy, R52WriteLM6Cy,
592                               R52WriteLM7Cy, R52WriteLM8Cy,
593                               R52WriteLM9Cy,
594                               R52ReserveLd9Cy]>,
595   SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
596                               R52WriteLM7Cy, R52WriteLM8Cy,
597                               R52WriteLM9Cy,
598                               R52ReserveLd9Cy]>,
599
600   // 6 D reg
601   SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
602                               R52WriteLM7Cy, R52WriteLM8Cy,
603                               R52WriteLM9Cy, R52WriteLM10Cy,
604                               R52ReserveLd10Cy]>,
605   SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
606                               R52WriteLM7Cy, R52WriteLM8Cy,
607                               R52WriteLM9Cy, R52WriteLM10Cy,
608                               R52ReserveLd10Cy]>,
609
610   // 7 D reg
611   SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
612                               R52WriteLM7Cy, R52WriteLM8Cy,
613                               R52WriteLM9Cy, R52WriteLM10Cy,
614                               R52WriteLM11Cy,
615                               R52ReserveLd11Cy]>,
616   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
617                               R52WriteLM7Cy, R52WriteLM8Cy,
618                               R52WriteLM9Cy, R52WriteLM10Cy,
619                               R52WriteLM11Cy,
620                               R52ReserveLd11Cy]>,
621
622   // 8 D reg
623   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
624                               R52WriteLM7Cy, R52WriteLM8Cy,
625                               R52WriteLM9Cy, R52WriteLM10Cy,
626                               R52WriteLM11Cy, R52WriteLM12Cy,
627                               R52ReserveLd12Cy]>,
628   SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
629                               R52WriteLM7Cy, R52WriteLM8Cy,
630                               R52WriteLM9Cy, R52WriteLM10Cy,
631                               R52WriteLM11Cy, R52WriteLM12Cy,
632                               R52ReserveLd12Cy]>,
633   // unknown number of reg.
634   SchedVar<NoSchedPred,      [R52WriteLM5Cy, R52WriteLM6Cy,
635                               R52WriteLM7Cy, R52WriteLM8Cy,
636                               R52WriteLM9Cy, R52WriteLM10Cy,
637                               R52WriteLM11Cy, R52WriteLM12Cy,
638                               R52ReserveLd5Cy]>
639 ]> { let Variadic=1;}
640
641 // variable stores. Cannot dual-issue
642 def R52WriteSTM5  : SchedWriteRes<[R52UnitLd]> {
643   let Latency = 5;
644   let NumMicroOps = 2;
645   let ResourceCycles = [1];
646 }
647 def R52WriteSTM6  : SchedWriteRes<[R52UnitLd]> {
648   let Latency = 6;
649   let NumMicroOps = 4;
650   let ResourceCycles = [2];
651 }
652 def R52WriteSTM7  : SchedWriteRes<[R52UnitLd]> {
653   let Latency = 7;
654   let NumMicroOps = 6;
655   let ResourceCycles = [3];
656 }
657 def R52WriteSTM8  : SchedWriteRes<[R52UnitLd]> {
658   let Latency = 8;
659   let NumMicroOps = 8;
660   let ResourceCycles = [4];
661 }
662 def R52WriteSTM9  : SchedWriteRes<[R52UnitLd]> {
663   let Latency = 9;
664   let NumMicroOps = 10;
665   let ResourceCycles = [5];
666 }
667 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
668   let Latency = 10;
669   let NumMicroOps = 12;
670   let ResourceCycles = [6];
671 }
672 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
673   let Latency = 11;
674   let NumMicroOps = 14;
675   let ResourceCycles = [7];
676 }
677 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
678   let Latency = 12;
679   let NumMicroOps = 16;
680   let ResourceCycles = [8];
681 }
682 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
683   let Latency = 13;
684   let NumMicroOps = 18;
685   let ResourceCycles = [9];
686 }
687 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
688   let Latency = 14;
689   let NumMicroOps = 20;
690   let ResourceCycles = [10];
691 }
692 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
693   let Latency = 15;
694   let NumMicroOps = 22;
695   let ResourceCycles = [11];
696 }
697
698 def R52WriteSTM : SchedWriteVariant<[
699   SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
700   SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
701   SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
702   SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
703   SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
704   SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
705   SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
706   SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
707   SchedVar<R52LMAddrPred9,  [R52WriteSTM9]>,
708   SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
709   SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
710   SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
711   SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
712   SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
713   SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
714   SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
715   // unknown number of registers, just use resources for two
716   SchedVar<NoSchedPred,      [R52WriteSTM6]>
717 ]>;
718
719 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
720 // another instruction in slot-1, but only in the last issue.
721 def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
722 def : WriteRes<WriteVLD2, [R52UnitLd]> {
723   let Latency = 6;
724   let NumMicroOps = 3;
725   let ResourceCycles = [2];
726   let SingleIssue = 1;
727 }
728 def : WriteRes<WriteVLD3, [R52UnitLd]> {
729   let Latency = 7;
730   let NumMicroOps = 5;
731   let ResourceCycles = [3];
732   let SingleIssue = 1;
733 }
734 def : WriteRes<WriteVLD4, [R52UnitLd]> {
735   let Latency = 8;
736   let NumMicroOps = 7;
737   let ResourceCycles = [4];
738   let SingleIssue = 1;
739 }
740 def R52WriteVST1Mem  : SchedWriteRes<[R52UnitLd]> {
741   let Latency = 5;
742   let NumMicroOps = 1;
743   let ResourceCycles = [1];
744 }
745 def R52WriteVST2Mem  : SchedWriteRes<[R52UnitLd]> {
746   let Latency = 6;
747   let NumMicroOps = 3;
748   let ResourceCycles = [2];
749 }
750 def R52WriteVST3Mem  : SchedWriteRes<[R52UnitLd]> {
751   let Latency = 7;
752   let NumMicroOps = 5;
753   let ResourceCycles = [3];
754 }
755 def R52WriteVST4Mem  : SchedWriteRes<[R52UnitLd]> {
756   let Latency = 8;
757   let NumMicroOps = 7;
758   let ResourceCycles = [4];
759 }
760 def R52WriteVST5Mem  : SchedWriteRes<[R52UnitLd]> {
761   let Latency = 9;
762   let NumMicroOps = 9;
763   let ResourceCycles = [5];
764 }
765
766
767 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
768 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
769 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
770
771 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
772 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
773 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
774
775 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
776
777 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
778                                (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
779 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
780                                 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
781 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
782                                (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
783
784 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
785                                             (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
786
787 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
788 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
789
790 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
791 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
792
793 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
794 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
795
796 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
797
798 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
799       (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
800 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
801       (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
802
803 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
804 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
805 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
806 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
807
808 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
809 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
810
811 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
812 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
813
814 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
815 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
816
817 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
818 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
819 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
820 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
821 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
822 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
823 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
824 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
825 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
826 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
827                   (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
828 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
829                   (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
830 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
831 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
832 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
833                  (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
834 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
835 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
836
837 //---
838 // VSTx. Vector Stores
839 //---
840 // 1-element structure store
841 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
842 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
843 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
844 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
845 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
846 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
847
848 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
849 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
850 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
851
852 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
853 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
854 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
855 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
856 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
857 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
858
859 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
860 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
861 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
862
863 // 2-element structure store
864 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
865 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
866 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
867
868 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
869 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
870 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
871 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
872 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
873 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
874
875 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
876 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
877 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
878
879 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
880 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
881 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
882 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
883 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
884 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
885
886 // 3-element structure store
887 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
888 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
889 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
890
891 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
892 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
893 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
894 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
895 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
896 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
897
898 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
899 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
900 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
901
902 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
903 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
904 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
905 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
906 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
907 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
908
909 // 4-element structure store
910 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
911 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
912 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
913
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
915 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
916 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
917 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
918 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
919 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
920
921 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
922 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
923 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
924
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
926 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
927 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
928 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
929 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
930 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
931
932 } // R52 SchedModel