]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
MFV r329766: 8962 zdb should work on non-idle pools
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / ARM / ARMScheduleR52.td
1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11 //
12 //===----------------------------------------------------------------------===//
13
14 // ===---------------------------------------------------------------------===//
15 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17 // There are two ALUs, one LDST, one MUL  and a non-pipelined integer DIV.
18 // A number of forwarding paths enable results of computations to be input
19 // to subsequent operations before they are written to registers.
20 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22 def CortexR52Model : SchedMachineModel {
23   let MicroOpBufferSize = 0;  // R52 is in-order processor
24   let IssueWidth = 2;         // 2 micro-ops dispatched per cycle
25   let LoadLatency = 1;        // Optimistic, assuming no misses
26   let MispredictPenalty = 8;  // A branch direction mispredict, including PFU
27   let CompleteModel = 0;      // Covers instructions applicable to cortex-r52.
28 }
29
30
31 //===----------------------------------------------------------------------===//
32 // Define each kind of processor resource and number available.
33
34 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
35 // Cortex-R52 is an in-order processor.
36
37 def R52UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
38 def R52UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
39 def R52UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
40 def R52UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load/Store
41 def R52UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
42 def R52UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
43 def R52UnitFPMUL  : ProcResource<2> { let BufferSize = 0; } // FP MUL
44 def R52UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP DIV
45
46 // Cortex-R52 specific SchedReads
47 def R52Read_ISS   : SchedRead;
48 def R52Read_EX1   : SchedRead;
49 def R52Read_EX2   : SchedRead;
50 def R52Read_WRI   : SchedRead;
51 def R52Read_F0    : SchedRead; // F0 maps to ISS stage of integer pipe
52 def R52Read_F1    : SchedRead;
53 def R52Read_F2    : SchedRead;
54
55
56 //===----------------------------------------------------------------------===//
57 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
58
59 let SchedModel = CortexR52Model in {
60
61 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
62 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
63 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
65 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
66
67 // Compares
68 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
69 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
70 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
71
72 // Multiply - aliased to sub-target specific later
73
74 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
75 def : WriteRes<WriteDIV, [R52UnitDiv]> {
76   let Latency = 8; let ResourceCycles = [8]; // non-pipelined
77 }
78
79 // Branches  - LR written in Late EX2
80 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
81 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
82 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
83
84 // Misc
85 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
86
87 // Integer pipeline by-passes
88 def : ReadAdvance<ReadALU, 1>;   // Operand needed in EX1 stage
89 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
90 def : ReadAdvance<ReadMUL, 0>;
91 def : ReadAdvance<ReadMAC, 0>;
92
93 // Floating-point. Map target-defined SchedReadWrites to subtarget
94 def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
95
96 def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
97   let Latency = 6;
98 }
99
100 def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
101   let Latency = 11;     // as it is internally two insns (MUL then ADD)
102 }
103
104 def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
105                               R52UnitFPALU, R52UnitFPALU]> {
106   let Latency = 11;
107 }
108
109 def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
110   let Latency = 7;          // FP div takes fixed #cycles
111   let ResourceCycles = [7]; // is not pipelined
112 }
113
114 def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
115   let Latency = 17;
116   let ResourceCycles = [17];
117 }
118
119 def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
120 def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
121
122 // Overriden via InstRW for this processor.
123 def : WriteRes<WriteVST1, []>;
124 def : WriteRes<WriteVST2, []>;
125 def : WriteRes<WriteVST3, []>;
126 def : WriteRes<WriteVST4, []>;
127
128 def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
129 def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
130
131 //===----------------------------------------------------------------------===//
132 // Subtarget-specific SchedReadWrites.
133
134 // Forwarding information - based on when an operand is read
135 def : ReadAdvance<R52Read_ISS, 0>;
136 def : ReadAdvance<R52Read_EX1, 1>;
137 def : ReadAdvance<R52Read_EX2, 2>;
138 def : ReadAdvance<R52Read_F0, 0>;
139 def : ReadAdvance<R52Read_F1, 1>;
140 def : ReadAdvance<R52Read_F2, 2>;
141
142
143 // Cortex-R52 specific SchedWrites for use with InstRW
144 def R52WriteMAC        : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
145 def R52WriteMACHi      : SchedWriteRes<[R52UnitMAC]> {
146   let Latency = 4; let NumMicroOps = 0;
147 }
148 def R52WriteDIV        : SchedWriteRes<[R52UnitDiv]> {
149   let Latency = 8; let ResourceCycles = [8]; // not pipelined
150 }
151 def R52WriteLd         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152 def R52WriteST         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
153 def R52WriteAdr        : SchedWriteRes<[]> { let Latency = 0; }
154 def R52WriteCC         : SchedWriteRes<[]> { let Latency = 0; }
155 def R52WriteALU_EX1    : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
156 def R52WriteALU_EX2    : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
157 def R52WriteALU_WRI    : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
158
159 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
160 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
161
162 // Alias generics to sub-target specific
163 def : SchedAlias<WriteMUL16, R52WriteMAC>;
164 def : SchedAlias<WriteMUL32, R52WriteMAC>;
165 def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
166 def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
167 def : SchedAlias<WriteMAC16, R52WriteMAC>;
168 def : SchedAlias<WriteMAC32, R52WriteMAC>;
169 def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
170 def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
171 def : SchedAlias<WritePreLd, R52WriteLd>;
172 def : SchedAlias<WriteLd, R52WriteLd>;
173 def : SchedAlias<WriteST, R52WriteST>;
174
175 def R52WriteFPALU_F3   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
176 def R52Write2FPALU_F3  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
177   let Latency = 4;
178 }
179 def R52WriteFPALU_F4   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
180 def R52Write2FPALU_F4  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
181   let Latency = 5;
182 }
183 def R52WriteFPALU_F5   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
184 def R52Write2FPALU_F5  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
185   let Latency = 6;
186 }
187 def R52WriteFPMUL_F5   : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
188 def R52Write2FPMUL_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
189   let Latency = 6;
190 }
191 def R52WriteFPMAC_F5   : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
192   let Latency = 11;     // as it is internally two insns (MUL then ADD)
193 }
194 def R52Write2FPMAC_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
195                                          R52UnitFPALU, R52UnitFPALU]> {
196   let Latency = 11;
197 }
198
199 def R52WriteFPLd_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
200 def R52WriteFPST_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
201
202 //===----------------------------------------------------------------------===//
203 // Floating-point. Map target defined SchedReadWrites to processor specific ones
204 //
205 def : SchedAlias<WriteFPCVT,   R52WriteFPALU_F5>;
206 def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
207 def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
208 def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
209
210 //===----------------------------------------------------------------------===//
211 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
212 //
213 def : InstRW<[WriteALU], (instrs COPY)>;
214
215 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
216       (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
217       "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
218
219 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
220       (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
221       "t2MOVi", "t2MOV_ga_dyn")>;
222 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
223       (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
224 def : InstRW<[R52WriteLd,R52Read_ISS],
225       (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
226
227 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
228
229 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
230       (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
231       "(t|t2)UBFX", "(t|t2)SBFX")>;
232
233 // Saturating arithmetic
234 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
235       (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
236       "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
237       "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
238       "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
239       "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
240       "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
241
242 // Parallel arithmetic
243 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
244       (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
245       "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
246       "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
247       "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
248
249 // Flag setting.
250 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
251       (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
252       "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
253       "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
254       "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
255       "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
256       "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
257
258 // Sum of Absolute Difference
259 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
260       (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
261
262 // Integer Multiply
263 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
264       (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
265       "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
266       "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
267       "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
268
269 // Multiply Accumulate
270 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
271 // The store pipeline is used partly for 64-bit operations.
272 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
273       (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
274       "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
275       "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
276       "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
277       "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
278       "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
279       "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
280       "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
281       "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
282       "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
283       "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
284       "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
285       "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
286
287 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
288       (instregex "t2SDIV", "t2UDIV")>;
289
290 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
291 // However, that's non-trivial to specify, so we keep it uniform
292 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
293       (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
294       "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
295       "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
296       "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
297       "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
298       "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
299 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
300       (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
301       "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
302       "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
303       "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
304       "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
305       "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
306
307 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
308 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
309
310 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
311       "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
312       "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
313       "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
314
315 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
316       "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
317       "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
318
319 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
320       "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
321       "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
322
323 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
324       (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
325       "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
326
327 def : InstRW<[R52WriteALU_EX1],
328     (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
329
330 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
331 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
332       (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
333
334 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
335 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
336 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
337 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
338
339 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
340       (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
341
342 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
343
344 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
345 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
346
347 // Integer Load, Multiple.
348 foreach Lat = 3-25 in {
349   def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
350     let Latency = Lat;
351   }
352   def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
353     let Latency = Lat;
354     let NumMicroOps = 0;
355   }
356 }
357 foreach NAddr = 1-16 in {
358   def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
359 }
360 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
361 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
362 def R52WriteILDM : SchedWriteVariant<[
363     SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
364
365     SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
366                                  R52WriteILDM6Cy]>,
367     SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
368                                  R52WriteILDM6Cy, R52WriteILDM7Cy]>,
369
370     SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
371                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
372                                  R52WriteILDM8Cy]>,
373     SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
374                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
375                                  R52WriteILDM8Cy, R52WriteILDM9Cy]>,
376
377     SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
378                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
379                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
380                                  R52WriteILDM10Cy]>,
381     SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
382                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
383                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
384                                  R52WriteILDM10Cy, R52WriteILDM11Cy]>,
385
386     SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
387                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
388                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
389                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
390                                  R52WriteILDM12Cy]>,
391     SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
392                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
393                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
394                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
395                                  R52WriteILDM12Cy, R52WriteILDM13Cy]>,
396
397     SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
398                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
399                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
400                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
401                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
402                                  R52WriteILDM14Cy]>,
403     SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
404                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
405                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
406                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
407                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
408                                  R52WriteILDM14Cy, R52WriteILDM15Cy]>,
409
410     SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
411                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
412                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
413                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
414                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
415                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
416                                  R52WriteILDM16Cy]>,
417     SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
418                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
419                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
420                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
421                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
422                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
423                                  R52WriteILDM16Cy, R52WriteILDM17Cy]>,
424
425     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
426                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
427                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
428                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
429                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
430                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
431                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
432                                  R52WriteILDM18Cy]>,
433     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
434                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
435                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
436                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
437                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
438                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
439                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
440                                  R52WriteILDM18Cy, R52WriteILDM19Cy]>,
441
442 // Unknown number of registers, just use resources for two registers.
443     SchedVar<NoSchedPred,      [R52WriteILDM4Cy, R52WriteILDM5Cy,
444                                 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
445                                 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
446                                 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
447                                 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
448                                 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
449                                 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
450                                 R52WriteILDM18Cy, R52WriteILDM19Cy]>
451 ]> { let Variadic=1; }
452
453 // Integer Store, Multiple
454 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
455   let Latency = 4;
456   let NumMicroOps = 2;
457 }
458 foreach NumAddr = 1-16 in {
459   def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
460 }
461 def R52WriteISTM : SchedWriteVariant<[
462     SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
463     SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
464     SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
465     SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
466     SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
467     SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
468     SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
469     SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
470     SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
471     SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
472     SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
473     SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
474     SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
475     SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
476     SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
477     // Unknow number of registers, just use resources for two registers.
478     SchedVar<NoSchedPred,      [R52WriteISTM2]>
479 ]>;
480
481 def : InstRW<[R52WriteILDM, R52Read_ISS],
482       (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
483       "(t|sys)LDM(IA|DA|DB|IB)$")>;
484 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
485       (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
486 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
487         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
488
489 // Integer Store, Single Element
490 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
491       (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
492       "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
493       "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
494
495 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
496       (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
497       "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
498       "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
499       "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
500
501 // Integer Store, Dual
502 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
503     (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
504 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
505     (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
506
507 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
508     (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
509 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
510     (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
511     "PUSH", "tPUSH")>;
512
513 // LDRLIT pseudo instructions, they expand to LDR + PICADD
514 def : InstRW<[R52WriteLd],
515       (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
516 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
517 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
518
519
520
521 //===----------------------------------------------------------------------===//
522 // VFP, Floating Point Support
523 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
524 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
525
526 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
527 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
528 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
529
530 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
531 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
532
533 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
534 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
535
536 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
537 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
538
539
540 //===----------------------------------------------------------------------===//
541 // Neon Support
542
543 // vector multiple load stores
544 foreach NumAddr = 1-16 in {
545   def R52LMAddrPred#NumAddr :
546     SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
547 }
548 foreach Lat = 1-32 in {
549   def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
550     let Latency = Lat;
551   }
552 }
553 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
554   def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
555     let Latency = 0;
556     let NumMicroOps = Num;
557     let ResourceCycles = [Num];
558   }
559 }
560 def R52WriteVLDM : SchedWriteVariant<[
561   // 1 D reg
562   SchedVar<R52LMAddrPred1,  [R52WriteLM5Cy,
563                               R52ReserveLd5Cy]>,
564   SchedVar<R52LMAddrPred2,  [R52WriteLM5Cy,
565                               R52ReserveLd5Cy]>,
566
567   // 2 D reg
568   SchedVar<R52LMAddrPred3,  [R52WriteLM5Cy, R52WriteLM6Cy,
569                               R52ReserveLd6Cy]>,
570   SchedVar<R52LMAddrPred4,  [R52WriteLM5Cy, R52WriteLM6Cy,
571                               R52ReserveLd6Cy]>,
572
573   // 3 D reg
574   SchedVar<R52LMAddrPred5,  [R52WriteLM5Cy, R52WriteLM6Cy,
575                               R52WriteLM7Cy,
576                               R52ReserveLd4Cy]>,
577   SchedVar<R52LMAddrPred6,  [R52WriteLM5Cy, R52WriteLM6Cy,
578                               R52WriteLM7Cy,
579                               R52ReserveLd7Cy]>,
580
581   // 4 D reg
582   SchedVar<R52LMAddrPred7,  [R52WriteLM5Cy, R52WriteLM6Cy,
583                               R52WriteLM7Cy, R52WriteLM8Cy,
584                               R52ReserveLd8Cy]>,
585   SchedVar<R52LMAddrPred8,  [R52WriteLM5Cy, R52WriteLM6Cy,
586                               R52WriteLM7Cy, R52WriteLM8Cy,
587                               R52ReserveLd8Cy]>,
588
589   // 5 D reg
590   SchedVar<R52LMAddrPred9,  [R52WriteLM5Cy, R52WriteLM6Cy,
591                               R52WriteLM7Cy, R52WriteLM8Cy,
592                               R52WriteLM9Cy,
593                               R52ReserveLd9Cy]>,
594   SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
595                               R52WriteLM7Cy, R52WriteLM8Cy,
596                               R52WriteLM9Cy,
597                               R52ReserveLd9Cy]>,
598
599   // 6 D reg
600   SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
601                               R52WriteLM7Cy, R52WriteLM8Cy,
602                               R52WriteLM9Cy, R52WriteLM10Cy,
603                               R52ReserveLd10Cy]>,
604   SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
605                               R52WriteLM7Cy, R52WriteLM8Cy,
606                               R52WriteLM9Cy, R52WriteLM10Cy,
607                               R52ReserveLd10Cy]>,
608
609   // 7 D reg
610   SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
611                               R52WriteLM7Cy, R52WriteLM8Cy,
612                               R52WriteLM9Cy, R52WriteLM10Cy,
613                               R52WriteLM11Cy,
614                               R52ReserveLd11Cy]>,
615   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
616                               R52WriteLM7Cy, R52WriteLM8Cy,
617                               R52WriteLM9Cy, R52WriteLM10Cy,
618                               R52WriteLM11Cy,
619                               R52ReserveLd11Cy]>,
620
621   // 8 D reg
622   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
623                               R52WriteLM7Cy, R52WriteLM8Cy,
624                               R52WriteLM9Cy, R52WriteLM10Cy,
625                               R52WriteLM11Cy, R52WriteLM12Cy,
626                               R52ReserveLd12Cy]>,
627   SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
628                               R52WriteLM7Cy, R52WriteLM8Cy,
629                               R52WriteLM9Cy, R52WriteLM10Cy,
630                               R52WriteLM11Cy, R52WriteLM12Cy,
631                               R52ReserveLd12Cy]>,
632   // unknown number of reg.
633   SchedVar<NoSchedPred,      [R52WriteLM5Cy, R52WriteLM6Cy,
634                               R52WriteLM7Cy, R52WriteLM8Cy,
635                               R52WriteLM9Cy, R52WriteLM10Cy,
636                               R52WriteLM11Cy, R52WriteLM12Cy,
637                               R52ReserveLd5Cy]>
638 ]> { let Variadic=1;}
639
640 // variable stores. Cannot dual-issue
641 def R52WriteSTM5  : SchedWriteRes<[R52UnitLd]> {
642   let Latency = 5;
643   let NumMicroOps = 2;
644   let ResourceCycles = [1];
645 }
646 def R52WriteSTM6  : SchedWriteRes<[R52UnitLd]> {
647   let Latency = 6;
648   let NumMicroOps = 4;
649   let ResourceCycles = [2];
650 }
651 def R52WriteSTM7  : SchedWriteRes<[R52UnitLd]> {
652   let Latency = 7;
653   let NumMicroOps = 6;
654   let ResourceCycles = [3];
655 }
656 def R52WriteSTM8  : SchedWriteRes<[R52UnitLd]> {
657   let Latency = 8;
658   let NumMicroOps = 8;
659   let ResourceCycles = [4];
660 }
661 def R52WriteSTM9  : SchedWriteRes<[R52UnitLd]> {
662   let Latency = 9;
663   let NumMicroOps = 10;
664   let ResourceCycles = [5];
665 }
666 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
667   let Latency = 10;
668   let NumMicroOps = 12;
669   let ResourceCycles = [6];
670 }
671 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
672   let Latency = 11;
673   let NumMicroOps = 14;
674   let ResourceCycles = [7];
675 }
676 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
677   let Latency = 12;
678   let NumMicroOps = 16;
679   let ResourceCycles = [8];
680 }
681 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
682   let Latency = 13;
683   let NumMicroOps = 18;
684   let ResourceCycles = [9];
685 }
686 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
687   let Latency = 14;
688   let NumMicroOps = 20;
689   let ResourceCycles = [10];
690 }
691 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
692   let Latency = 15;
693   let NumMicroOps = 22;
694   let ResourceCycles = [11];
695 }
696
697 def R52WriteSTM : SchedWriteVariant<[
698   SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
699   SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
700   SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
701   SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
702   SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
703   SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
704   SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
705   SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
706   SchedVar<R52LMAddrPred9,  [R52WriteSTM9]>,
707   SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
708   SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
709   SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
710   SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
711   SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
712   SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
713   SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
714   // unknown number of registers, just use resources for two
715   SchedVar<NoSchedPred,      [R52WriteSTM6]>
716 ]>;
717
718 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
719 // another instruction in slot-1, but only in the last issue.
720 def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
721 def : WriteRes<WriteVLD2, [R52UnitLd]> {
722   let Latency = 6;
723   let NumMicroOps = 3;
724   let ResourceCycles = [2];
725   let SingleIssue = 1;
726 }
727 def : WriteRes<WriteVLD3, [R52UnitLd]> {
728   let Latency = 7;
729   let NumMicroOps = 5;
730   let ResourceCycles = [3];
731   let SingleIssue = 1;
732 }
733 def : WriteRes<WriteVLD4, [R52UnitLd]> {
734   let Latency = 8;
735   let NumMicroOps = 7;
736   let ResourceCycles = [4];
737   let SingleIssue = 1;
738 }
739 def R52WriteVST1Mem  : SchedWriteRes<[R52UnitLd]> {
740   let Latency = 5;
741   let NumMicroOps = 1;
742   let ResourceCycles = [1];
743 }
744 def R52WriteVST2Mem  : SchedWriteRes<[R52UnitLd]> {
745   let Latency = 6;
746   let NumMicroOps = 3;
747   let ResourceCycles = [2];
748 }
749 def R52WriteVST3Mem  : SchedWriteRes<[R52UnitLd]> {
750   let Latency = 7;
751   let NumMicroOps = 5;
752   let ResourceCycles = [3];
753 }
754 def R52WriteVST4Mem  : SchedWriteRes<[R52UnitLd]> {
755   let Latency = 8;
756   let NumMicroOps = 7;
757   let ResourceCycles = [4];
758 }
759 def R52WriteVST5Mem  : SchedWriteRes<[R52UnitLd]> {
760   let Latency = 9;
761   let NumMicroOps = 9;
762   let ResourceCycles = [5];
763 }
764
765
766 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
767 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
768 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
769
770 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
771 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
772 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
773
774 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
775
776 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
777                                (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
778 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
779                                 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
780 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
781                                (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
782
783 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
784                                             (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
785
786 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
787 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
788
789 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
790 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
791
792 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
793 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
794
795 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
796
797 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
798       (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
799 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
800       (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
801
802 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
803 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
804 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
805 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
806
807 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
808 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
809
810 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
811 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
812
813 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
814 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
815
816 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
817 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
818 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
819 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
820 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
821 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
822 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
823 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
824 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
825 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
826                   (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
827 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
828                   (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
829 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
830 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
831 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
832                  (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
833 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
834 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
835
836 //---
837 // VSTx. Vector Stores
838 //---
839 // 1-element structure store
840 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
841 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
842 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
843 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
844 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
845 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
846
847 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
848 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
849 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
850
851 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
852 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
853 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
854 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
855 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
856 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
857
858 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
859 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
860 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
861
862 // 2-element structure store
863 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
864 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
865 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
866
867 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
868 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
869 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
870 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
871 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
872 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
873
874 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
875 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
876 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
877
878 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
879 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
880 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
881 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
882 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
883 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
884
885 // 3-element structure store
886 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
887 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
888 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
889
890 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
891 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
892 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
893 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
894 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
895 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
896
897 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
898 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
899 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
900
901 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
902 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
903 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
904 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
905 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
906 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
907
908 // 4-element structure store
909 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
910 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
911 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
912
913 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
915 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
916 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
917 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
918 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
919
920 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
921 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
922 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
923
924 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
926 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
927 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
928 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
929 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
930
931 } // R52 SchedModel