]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
Fix a memory leak in if_delgroups() introduced in r334118.
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / ARM / ARMScheduleR52.td
1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
10 //
11 //===----------------------------------------------------------------------===//
12
13 // ===---------------------------------------------------------------------===//
14 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
15 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
16 // There are two ALUs, one LDST, one MUL  and a non-pipelined integer DIV.
17 // A number of forwarding paths enable results of computations to be input
18 // to subsequent operations before they are written to registers.
19 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
20
21 def CortexR52Model : SchedMachineModel {
22   let MicroOpBufferSize = 0;  // R52 is in-order processor
23   let IssueWidth = 2;         // 2 micro-ops dispatched per cycle
24   let LoadLatency = 1;        // Optimistic, assuming no misses
25   let MispredictPenalty = 8;  // A branch direction mispredict, including PFU
26   let CompleteModel = 0;      // Covers instructions applicable to cortex-r52.
27 }
28
29
30 //===----------------------------------------------------------------------===//
31 // Define each kind of processor resource and number available.
32
33 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
34 // Cortex-R52 is an in-order processor.
35
36 def R52UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
37 def R52UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
38 def R52UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
39 def R52UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load/Store
40 def R52UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
41 def R52UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
42 def R52UnitFPMUL  : ProcResource<2> { let BufferSize = 0; } // FP MUL
43 def R52UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP DIV
44
45 // Cortex-R52 specific SchedReads
46 def R52Read_ISS   : SchedRead;
47 def R52Read_EX1   : SchedRead;
48 def R52Read_EX2   : SchedRead;
49 def R52Read_WRI   : SchedRead;
50 def R52Read_F0    : SchedRead; // F0 maps to ISS stage of integer pipe
51 def R52Read_F1    : SchedRead;
52 def R52Read_F2    : SchedRead;
53
54
55 //===----------------------------------------------------------------------===//
56 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
57
58 let SchedModel = CortexR52Model in {
59
60 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
61 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
62 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
63 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
65
66 // Compares
67 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
68 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
69 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
70
71 // Multiply - aliased to sub-target specific later
72
73 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
74 def : WriteRes<WriteDIV, [R52UnitDiv]> {
75   let Latency = 8; let ResourceCycles = [8]; // non-pipelined
76 }
77
78 // Branches  - LR written in Late EX2
79 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
80 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
81 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
82
83 // Misc
84 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
85
86 // Integer pipeline by-passes
87 def : ReadAdvance<ReadALU, 1>;   // Operand needed in EX1 stage
88 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
89 def : ReadAdvance<ReadMUL, 0>;
90 def : ReadAdvance<ReadMAC, 0>;
91
92 // Floating-point. Map target-defined SchedReadWrites to subtarget
93 def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
94
95 def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
96   let Latency = 6;
97 }
98
99 def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
100   let Latency = 11;     // as it is internally two insns (MUL then ADD)
101 }
102
103 def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
104                               R52UnitFPALU, R52UnitFPALU]> {
105   let Latency = 11;
106 }
107
108 def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
109   let Latency = 7;          // FP div takes fixed #cycles
110   let ResourceCycles = [7]; // is not pipelined
111 }
112
113 def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
114   let Latency = 17;
115   let ResourceCycles = [17];
116 }
117
118 def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
119 def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
120
121 // Overriden via InstRW for this processor.
122 def : WriteRes<WriteVST1, []>;
123 def : WriteRes<WriteVST2, []>;
124 def : WriteRes<WriteVST3, []>;
125 def : WriteRes<WriteVST4, []>;
126
127 def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
128 def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
129
130 //===----------------------------------------------------------------------===//
131 // Subtarget-specific SchedReadWrites.
132
133 // Forwarding information - based on when an operand is read
134 def : ReadAdvance<R52Read_ISS, 0>;
135 def : ReadAdvance<R52Read_EX1, 1>;
136 def : ReadAdvance<R52Read_EX2, 2>;
137 def : ReadAdvance<R52Read_F0, 0>;
138 def : ReadAdvance<R52Read_F1, 1>;
139 def : ReadAdvance<R52Read_F2, 2>;
140
141
142 // Cortex-R52 specific SchedWrites for use with InstRW
143 def R52WriteMAC        : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
144 def R52WriteMACHi      : SchedWriteRes<[R52UnitMAC]> {
145   let Latency = 4; let NumMicroOps = 0;
146 }
147 def R52WriteDIV        : SchedWriteRes<[R52UnitDiv]> {
148   let Latency = 8; let ResourceCycles = [8]; // not pipelined
149 }
150 def R52WriteLd         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
151 def R52WriteST         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152 def R52WriteAdr        : SchedWriteRes<[]> { let Latency = 0; }
153 def R52WriteCC         : SchedWriteRes<[]> { let Latency = 0; }
154 def R52WriteALU_EX1    : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
155 def R52WriteALU_EX2    : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
156 def R52WriteALU_WRI    : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
157
158 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
159 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
160
161 // Alias generics to sub-target specific
162 def : SchedAlias<WriteMUL16, R52WriteMAC>;
163 def : SchedAlias<WriteMUL32, R52WriteMAC>;
164 def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
165 def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
166 def : SchedAlias<WriteMAC16, R52WriteMAC>;
167 def : SchedAlias<WriteMAC32, R52WriteMAC>;
168 def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
169 def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
170 def : SchedAlias<WritePreLd, R52WriteLd>;
171 def : SchedAlias<WriteLd, R52WriteLd>;
172 def : SchedAlias<WriteST, R52WriteST>;
173
174 def R52WriteFPALU_F3   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
175 def R52Write2FPALU_F3  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
176   let Latency = 4;
177 }
178 def R52WriteFPALU_F4   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
179 def R52Write2FPALU_F4  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
180   let Latency = 5;
181 }
182 def R52WriteFPALU_F5   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
183 def R52Write2FPALU_F5  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
184   let Latency = 6;
185 }
186 def R52WriteFPMUL_F5   : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
187 def R52Write2FPMUL_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
188   let Latency = 6;
189 }
190 def R52WriteFPMAC_F5   : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
191   let Latency = 11;     // as it is internally two insns (MUL then ADD)
192 }
193 def R52Write2FPMAC_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
194                                          R52UnitFPALU, R52UnitFPALU]> {
195   let Latency = 11;
196 }
197
198 def R52WriteFPLd_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
199 def R52WriteFPST_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
200
201 //===----------------------------------------------------------------------===//
202 // Floating-point. Map target defined SchedReadWrites to processor specific ones
203 //
204 def : SchedAlias<WriteFPCVT,   R52WriteFPALU_F5>;
205 def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
206 def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
207 def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
208
209 //===----------------------------------------------------------------------===//
210 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
211 //
212 def : InstRW<[WriteALU], (instrs COPY)>;
213
214 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
215       (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
216       "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
217
218 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
219       (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>;
220 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
221       (instregex "MOV_ga_pcrel$")>;
222 def : InstRW<[R52WriteLd,R52Read_ISS],
223       (instregex "MOV_ga_pcrel_ldr")>;
224
225 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
226
227 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
228       (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
229       "(t|t2)UBFX", "(t|t2)SBFX")>;
230
231 // Saturating arithmetic
232 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
233       (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
234       "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
235       "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
236       "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
237       "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
238       "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
239
240 // Parallel arithmetic
241 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
242       (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
243       "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
244       "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
245       "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
246
247 // Flag setting.
248 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
249       (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
250       "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
251       "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
252       "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
253       "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
254       "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
255
256 // Sum of Absolute Difference
257 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
258       (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >;
259
260 // Integer Multiply
261 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
262       (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
263       "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
264       "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
265       "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
266
267 // Multiply Accumulate
268 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
269 // The store pipeline is used partly for 64-bit operations.
270 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
271       (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
272       "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
273       "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
274       "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
275       "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
276       "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
277       "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
278       "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
279       "SMLAL", "UMLAL", "SMLALBT",
280       "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
281       "UMAAL", "t2SMLAL", "t2UMLAL",
282       "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
283       "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
284
285 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
286       (instregex "t2SDIV", "t2UDIV")>;
287
288 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
289 // However, that's non-trivial to specify, so we keep it uniform
290 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
291       (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
292       "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
293       "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
294       "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
295       "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
296       "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
297 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
298       (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
299       "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
300       "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
301       "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
302       "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?",
303       "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
304
305 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
306 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
307
308 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri",
309       "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
310       "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
311       "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
312
313 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
314       "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
315       "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
316
317 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
318       "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
319       "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>;
320
321 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
322       (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
323       "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>;
324
325 def : InstRW<[R52WriteALU_EX1],
326     (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>;
327
328 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
329 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
330       (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
331
332 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
333 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
334 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
335 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
336
337 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
338       (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
339
340 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
341
342 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
343 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
344
345 // Integer Load, Multiple.
346 foreach Lat = 3-25 in {
347   def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
348     let Latency = Lat;
349   }
350   def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
351     let Latency = Lat;
352     let NumMicroOps = 0;
353   }
354 }
355 foreach NAddr = 1-16 in {
356   def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
357 }
358 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
359 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
360 def R52WriteILDM : SchedWriteVariant<[
361     SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
362
363     SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
364                                  R52WriteILDM6Cy]>,
365     SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
366                                  R52WriteILDM6Cy, R52WriteILDM7Cy]>,
367
368     SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
369                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
370                                  R52WriteILDM8Cy]>,
371     SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
372                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
373                                  R52WriteILDM8Cy, R52WriteILDM9Cy]>,
374
375     SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
376                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
377                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
378                                  R52WriteILDM10Cy]>,
379     SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
380                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
381                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
382                                  R52WriteILDM10Cy, R52WriteILDM11Cy]>,
383
384     SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
385                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
386                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
387                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
388                                  R52WriteILDM12Cy]>,
389     SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
390                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
391                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
392                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
393                                  R52WriteILDM12Cy, R52WriteILDM13Cy]>,
394
395     SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
396                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
397                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
398                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
399                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
400                                  R52WriteILDM14Cy]>,
401     SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
402                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
403                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
404                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
405                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
406                                  R52WriteILDM14Cy, R52WriteILDM15Cy]>,
407
408     SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
409                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
410                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
411                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
412                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
413                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
414                                  R52WriteILDM16Cy]>,
415     SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
416                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
417                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
418                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
419                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
420                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
421                                  R52WriteILDM16Cy, R52WriteILDM17Cy]>,
422
423     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
424                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
425                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
426                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
427                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
428                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
429                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
430                                  R52WriteILDM18Cy]>,
431     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
432                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
433                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
434                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
435                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
436                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
437                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
438                                  R52WriteILDM18Cy, R52WriteILDM19Cy]>,
439
440 // Unknown number of registers, just use resources for two registers.
441     SchedVar<NoSchedPred,      [R52WriteILDM4Cy, R52WriteILDM5Cy,
442                                 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
443                                 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
444                                 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
445                                 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
446                                 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
447                                 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
448                                 R52WriteILDM18Cy, R52WriteILDM19Cy]>
449 ]> { let Variadic=1; }
450
451 // Integer Store, Multiple
452 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
453   let Latency = 4;
454   let NumMicroOps = 2;
455 }
456 foreach NumAddr = 1-16 in {
457   def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
458 }
459 def R52WriteISTM : SchedWriteVariant<[
460     SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
461     SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
462     SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
463     SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
464     SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
465     SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
466     SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
467     SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
468     SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
469     SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
470     SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
471     SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
472     SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
473     SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
474     SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
475     // Unknow number of registers, just use resources for two registers.
476     SchedVar<NoSchedPred,      [R52WriteISTM2]>
477 ]>;
478
479 def : InstRW<[R52WriteILDM, R52Read_ISS],
480       (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
481       "(t|sys)LDM(IA|DA|DB|IB)$")>;
482 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
483       (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
484 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
485         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
486
487 // Integer Store, Single Element
488 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
489       (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
490       "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
491       "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
492
493 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
494       (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
495       "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
496       "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
497       "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
498
499 // Integer Store, Dual
500 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
501     (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>;
502 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
503     (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
504
505 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
506     (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
507 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
508     (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
509     "tPUSH")>;
510
511 // LDRLIT pseudo instructions, they expand to LDR + PICADD
512 def : InstRW<[R52WriteLd],
513       (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>;
514 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
515 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
516
517
518
519 //===----------------------------------------------------------------------===//
520 // VFP, Floating Point Support
521 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
522 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
523
524 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
525 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
526 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
527
528 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
529 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
530
531 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>;
532 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
533
534 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
535 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
536
537
538 //===----------------------------------------------------------------------===//
539 // Neon Support
540
541 // vector multiple load stores
542 foreach NumAddr = 1-16 in {
543   def R52LMAddrPred#NumAddr :
544     SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
545 }
546 foreach Lat = 1-32 in {
547   def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
548     let Latency = Lat;
549   }
550 }
551 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
552   def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
553     let Latency = 0;
554     let NumMicroOps = Num;
555     let ResourceCycles = [Num];
556   }
557 }
558 def R52WriteVLDM : SchedWriteVariant<[
559   // 1 D reg
560   SchedVar<R52LMAddrPred1,  [R52WriteLM5Cy,
561                               R52ReserveLd5Cy]>,
562   SchedVar<R52LMAddrPred2,  [R52WriteLM5Cy,
563                               R52ReserveLd5Cy]>,
564
565   // 2 D reg
566   SchedVar<R52LMAddrPred3,  [R52WriteLM5Cy, R52WriteLM6Cy,
567                               R52ReserveLd6Cy]>,
568   SchedVar<R52LMAddrPred4,  [R52WriteLM5Cy, R52WriteLM6Cy,
569                               R52ReserveLd6Cy]>,
570
571   // 3 D reg
572   SchedVar<R52LMAddrPred5,  [R52WriteLM5Cy, R52WriteLM6Cy,
573                               R52WriteLM7Cy,
574                               R52ReserveLd4Cy]>,
575   SchedVar<R52LMAddrPred6,  [R52WriteLM5Cy, R52WriteLM6Cy,
576                               R52WriteLM7Cy,
577                               R52ReserveLd7Cy]>,
578
579   // 4 D reg
580   SchedVar<R52LMAddrPred7,  [R52WriteLM5Cy, R52WriteLM6Cy,
581                               R52WriteLM7Cy, R52WriteLM8Cy,
582                               R52ReserveLd8Cy]>,
583   SchedVar<R52LMAddrPred8,  [R52WriteLM5Cy, R52WriteLM6Cy,
584                               R52WriteLM7Cy, R52WriteLM8Cy,
585                               R52ReserveLd8Cy]>,
586
587   // 5 D reg
588   SchedVar<R52LMAddrPred9,  [R52WriteLM5Cy, R52WriteLM6Cy,
589                               R52WriteLM7Cy, R52WriteLM8Cy,
590                               R52WriteLM9Cy,
591                               R52ReserveLd9Cy]>,
592   SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
593                               R52WriteLM7Cy, R52WriteLM8Cy,
594                               R52WriteLM9Cy,
595                               R52ReserveLd9Cy]>,
596
597   // 6 D reg
598   SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
599                               R52WriteLM7Cy, R52WriteLM8Cy,
600                               R52WriteLM9Cy, R52WriteLM10Cy,
601                               R52ReserveLd10Cy]>,
602   SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
603                               R52WriteLM7Cy, R52WriteLM8Cy,
604                               R52WriteLM9Cy, R52WriteLM10Cy,
605                               R52ReserveLd10Cy]>,
606
607   // 7 D reg
608   SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
609                               R52WriteLM7Cy, R52WriteLM8Cy,
610                               R52WriteLM9Cy, R52WriteLM10Cy,
611                               R52WriteLM11Cy,
612                               R52ReserveLd11Cy]>,
613   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
614                               R52WriteLM7Cy, R52WriteLM8Cy,
615                               R52WriteLM9Cy, R52WriteLM10Cy,
616                               R52WriteLM11Cy,
617                               R52ReserveLd11Cy]>,
618
619   // 8 D reg
620   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
621                               R52WriteLM7Cy, R52WriteLM8Cy,
622                               R52WriteLM9Cy, R52WriteLM10Cy,
623                               R52WriteLM11Cy, R52WriteLM12Cy,
624                               R52ReserveLd12Cy]>,
625   SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
626                               R52WriteLM7Cy, R52WriteLM8Cy,
627                               R52WriteLM9Cy, R52WriteLM10Cy,
628                               R52WriteLM11Cy, R52WriteLM12Cy,
629                               R52ReserveLd12Cy]>,
630   // unknown number of reg.
631   SchedVar<NoSchedPred,      [R52WriteLM5Cy, R52WriteLM6Cy,
632                               R52WriteLM7Cy, R52WriteLM8Cy,
633                               R52WriteLM9Cy, R52WriteLM10Cy,
634                               R52WriteLM11Cy, R52WriteLM12Cy,
635                               R52ReserveLd5Cy]>
636 ]> { let Variadic=1;}
637
638 // variable stores. Cannot dual-issue
639 def R52WriteSTM5  : SchedWriteRes<[R52UnitLd]> {
640   let Latency = 5;
641   let NumMicroOps = 2;
642   let ResourceCycles = [1];
643 }
644 def R52WriteSTM6  : SchedWriteRes<[R52UnitLd]> {
645   let Latency = 6;
646   let NumMicroOps = 4;
647   let ResourceCycles = [2];
648 }
649 def R52WriteSTM7  : SchedWriteRes<[R52UnitLd]> {
650   let Latency = 7;
651   let NumMicroOps = 6;
652   let ResourceCycles = [3];
653 }
654 def R52WriteSTM8  : SchedWriteRes<[R52UnitLd]> {
655   let Latency = 8;
656   let NumMicroOps = 8;
657   let ResourceCycles = [4];
658 }
659 def R52WriteSTM9  : SchedWriteRes<[R52UnitLd]> {
660   let Latency = 9;
661   let NumMicroOps = 10;
662   let ResourceCycles = [5];
663 }
664 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
665   let Latency = 10;
666   let NumMicroOps = 12;
667   let ResourceCycles = [6];
668 }
669 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
670   let Latency = 11;
671   let NumMicroOps = 14;
672   let ResourceCycles = [7];
673 }
674 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
675   let Latency = 12;
676   let NumMicroOps = 16;
677   let ResourceCycles = [8];
678 }
679 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
680   let Latency = 13;
681   let NumMicroOps = 18;
682   let ResourceCycles = [9];
683 }
684 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
685   let Latency = 14;
686   let NumMicroOps = 20;
687   let ResourceCycles = [10];
688 }
689 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
690   let Latency = 15;
691   let NumMicroOps = 22;
692   let ResourceCycles = [11];
693 }
694
695 def R52WriteSTM : SchedWriteVariant<[
696   SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
697   SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
698   SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
699   SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
700   SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
701   SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
702   SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
703   SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
704   SchedVar<R52LMAddrPred9,  [R52WriteSTM9]>,
705   SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
706   SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
707   SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
708   SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
709   SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
710   SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
711   SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
712   // unknown number of registers, just use resources for two
713   SchedVar<NoSchedPred,      [R52WriteSTM6]>
714 ]>;
715
716 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
717 // another instruction in slot-1, but only in the last issue.
718 def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
719 def : WriteRes<WriteVLD2, [R52UnitLd]> {
720   let Latency = 6;
721   let NumMicroOps = 3;
722   let ResourceCycles = [2];
723   let SingleIssue = 1;
724 }
725 def : WriteRes<WriteVLD3, [R52UnitLd]> {
726   let Latency = 7;
727   let NumMicroOps = 5;
728   let ResourceCycles = [3];
729   let SingleIssue = 1;
730 }
731 def : WriteRes<WriteVLD4, [R52UnitLd]> {
732   let Latency = 8;
733   let NumMicroOps = 7;
734   let ResourceCycles = [4];
735   let SingleIssue = 1;
736 }
737 def R52WriteVST1Mem  : SchedWriteRes<[R52UnitLd]> {
738   let Latency = 5;
739   let NumMicroOps = 1;
740   let ResourceCycles = [1];
741 }
742 def R52WriteVST2Mem  : SchedWriteRes<[R52UnitLd]> {
743   let Latency = 6;
744   let NumMicroOps = 3;
745   let ResourceCycles = [2];
746 }
747 def R52WriteVST3Mem  : SchedWriteRes<[R52UnitLd]> {
748   let Latency = 7;
749   let NumMicroOps = 5;
750   let ResourceCycles = [3];
751 }
752 def R52WriteVST4Mem  : SchedWriteRes<[R52UnitLd]> {
753   let Latency = 8;
754   let NumMicroOps = 7;
755   let ResourceCycles = [4];
756 }
757 def R52WriteVST5Mem  : SchedWriteRes<[R52UnitLd]> {
758   let Latency = 9;
759   let NumMicroOps = 9;
760   let ResourceCycles = [5];
761 }
762
763
764 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
765 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
766 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
767
768 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
769 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
770 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
771
772 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
773
774 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
775                                (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
776 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
777                                 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
778 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
779                                (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
780
781 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
782                                             (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
783
784 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
785 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
786
787 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
788 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
789
790 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
791 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
792
793 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
794       (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
795 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
796       (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
797
798 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
799 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
800 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
801 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
802
803 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
804 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
805
806 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
807 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
808
809 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
810 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
811
812 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
813 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
814 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
815 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
816 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
817 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
818 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
819 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
820 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
821 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
822                   (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
823 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
824                   (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
825 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
826 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
827 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
828                  (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
829 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
830 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
831
832 //---
833 // VSTx. Vector Stores
834 //---
835 // 1-element structure store
836 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
837 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
838 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
839 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
840 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
841 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
842
843 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
844 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
845 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
846
847 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
848 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
849 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
850 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
851 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
852 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
853
854 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
855 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
856 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
857
858 // 2-element structure store
859 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
860 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
861 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
862
863 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
864 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
865 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
866 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
867 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
868 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
869
870 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
871 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
872 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
873
874 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
875 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
876 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
877 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
878 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
879 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
880
881 // 3-element structure store
882 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
883 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
884 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
885
886 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
887 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
888 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
889 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
890 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
891 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
892
893 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
894 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
895 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
896
897 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
898 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
899 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
900 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
901 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
902 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
903
904 // 4-element structure store
905 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
906 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
907 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
908
909 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
910 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
911 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
912 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
913 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
915
916 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
917 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
918 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
919
920 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
921 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
922 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
923 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
924 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
926
927 } // R52 SchedModel