]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - lib/Target/ARM/ARMScheduleR52.td
Vendor import of llvm trunk r338150:
[FreeBSD/FreeBSD.git] / lib / Target / ARM / ARMScheduleR52.td
1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11 //
12 //===----------------------------------------------------------------------===//
13
14 // ===---------------------------------------------------------------------===//
15 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17 // There are two ALUs, one LDST, one MUL  and a non-pipelined integer DIV.
18 // A number of forwarding paths enable results of computations to be input
19 // to subsequent operations before they are written to registers.
20 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22 def CortexR52Model : SchedMachineModel {
23   let MicroOpBufferSize = 0;  // R52 is in-order processor
24   let IssueWidth = 2;         // 2 micro-ops dispatched per cycle
25   let LoadLatency = 1;        // Optimistic, assuming no misses
26   let MispredictPenalty = 8;  // A branch direction mispredict, including PFU
27   let CompleteModel = 0;      // Covers instructions applicable to cortex-r52.
28 }
29
30
31 //===----------------------------------------------------------------------===//
32 // Define each kind of processor resource and number available.
33
34 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
35 // Cortex-R52 is an in-order processor.
36
37 def R52UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
38 def R52UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
39 def R52UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
40 def R52UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load/Store
41 def R52UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
42 def R52UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
43 def R52UnitFPMUL  : ProcResource<2> { let BufferSize = 0; } // FP MUL
44 def R52UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP DIV
45
46 // Cortex-R52 specific SchedReads
47 def R52Read_ISS   : SchedRead;
48 def R52Read_EX1   : SchedRead;
49 def R52Read_EX2   : SchedRead;
50 def R52Read_WRI   : SchedRead;
51 def R52Read_F0    : SchedRead; // F0 maps to ISS stage of integer pipe
52 def R52Read_F1    : SchedRead;
53 def R52Read_F2    : SchedRead;
54
55
56 //===----------------------------------------------------------------------===//
57 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
58
59 let SchedModel = CortexR52Model in {
60
61 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
62 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
63 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
65 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
66
67 // Compares
68 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
69 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
70 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
71
72 // Multiply - aliased to sub-target specific later
73
74 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
75 def : WriteRes<WriteDIV, [R52UnitDiv]> {
76   let Latency = 8; let ResourceCycles = [8]; // non-pipelined
77 }
78
79 // Branches  - LR written in Late EX2
80 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
81 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
82 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
83
84 // Misc
85 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
86
87 // Integer pipeline by-passes
88 def : ReadAdvance<ReadALU, 1>;   // Operand needed in EX1 stage
89 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
90 def : ReadAdvance<ReadMUL, 0>;
91 def : ReadAdvance<ReadMAC, 0>;
92
93 // Floating-point. Map target-defined SchedReadWrites to subtarget
94 def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
95
96 def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
97   let Latency = 6;
98 }
99
100 def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
101   let Latency = 11;     // as it is internally two insns (MUL then ADD)
102 }
103
104 def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
105                               R52UnitFPALU, R52UnitFPALU]> {
106   let Latency = 11;
107 }
108
109 def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
110   let Latency = 7;          // FP div takes fixed #cycles
111   let ResourceCycles = [7]; // is not pipelined
112 }
113
114 def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
115   let Latency = 17;
116   let ResourceCycles = [17];
117 }
118
119 def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
120 def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
121
122 // Overriden via InstRW for this processor.
123 def : WriteRes<WriteVST1, []>;
124 def : WriteRes<WriteVST2, []>;
125 def : WriteRes<WriteVST3, []>;
126 def : WriteRes<WriteVST4, []>;
127
128 def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
129 def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
130
131 //===----------------------------------------------------------------------===//
132 // Subtarget-specific SchedReadWrites.
133
134 // Forwarding information - based on when an operand is read
135 def : ReadAdvance<R52Read_ISS, 0>;
136 def : ReadAdvance<R52Read_EX1, 1>;
137 def : ReadAdvance<R52Read_EX2, 2>;
138 def : ReadAdvance<R52Read_F0, 0>;
139 def : ReadAdvance<R52Read_F1, 1>;
140 def : ReadAdvance<R52Read_F2, 2>;
141
142
143 // Cortex-R52 specific SchedWrites for use with InstRW
144 def R52WriteMAC        : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
145 def R52WriteMACHi      : SchedWriteRes<[R52UnitMAC]> {
146   let Latency = 4; let NumMicroOps = 0;
147 }
148 def R52WriteDIV        : SchedWriteRes<[R52UnitDiv]> {
149   let Latency = 8; let ResourceCycles = [8]; // not pipelined
150 }
151 def R52WriteLd         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152 def R52WriteST         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
153 def R52WriteAdr        : SchedWriteRes<[]> { let Latency = 0; }
154 def R52WriteCC         : SchedWriteRes<[]> { let Latency = 0; }
155 def R52WriteALU_EX1    : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
156 def R52WriteALU_EX2    : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
157 def R52WriteALU_WRI    : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
158
159 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
160 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
161
162 // Alias generics to sub-target specific
163 def : SchedAlias<WriteMUL16, R52WriteMAC>;
164 def : SchedAlias<WriteMUL32, R52WriteMAC>;
165 def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
166 def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
167 def : SchedAlias<WriteMAC16, R52WriteMAC>;
168 def : SchedAlias<WriteMAC32, R52WriteMAC>;
169 def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
170 def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
171 def : SchedAlias<WritePreLd, R52WriteLd>;
172 def : SchedAlias<WriteLd, R52WriteLd>;
173 def : SchedAlias<WriteST, R52WriteST>;
174
175 def R52WriteFPALU_F3   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
176 def R52Write2FPALU_F3  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
177   let Latency = 4;
178 }
179 def R52WriteFPALU_F4   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
180 def R52Write2FPALU_F4  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
181   let Latency = 5;
182 }
183 def R52WriteFPALU_F5   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
184 def R52Write2FPALU_F5  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
185   let Latency = 6;
186 }
187 def R52WriteFPMUL_F5   : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
188 def R52Write2FPMUL_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
189   let Latency = 6;
190 }
191 def R52WriteFPMAC_F5   : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
192   let Latency = 11;     // as it is internally two insns (MUL then ADD)
193 }
194 def R52Write2FPMAC_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
195                                          R52UnitFPALU, R52UnitFPALU]> {
196   let Latency = 11;
197 }
198
199 def R52WriteFPLd_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
200 def R52WriteFPST_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
201
202 //===----------------------------------------------------------------------===//
203 // Floating-point. Map target defined SchedReadWrites to processor specific ones
204 //
205 def : SchedAlias<WriteFPCVT,   R52WriteFPALU_F5>;
206 def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
207 def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
208 def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
209
210 //===----------------------------------------------------------------------===//
211 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
212 //
213 def : InstRW<[WriteALU], (instrs COPY)>;
214
215 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
216       (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
217       "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
218
219 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
220       (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>;
221 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
222       (instregex "MOV_ga_pcrel$")>;
223 def : InstRW<[R52WriteLd,R52Read_ISS],
224       (instregex "MOV_ga_pcrel_ldr")>;
225
226 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
227
228 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
229       (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
230       "(t|t2)UBFX", "(t|t2)SBFX")>;
231
232 // Saturating arithmetic
233 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
234       (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
235       "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
236       "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
237       "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
238       "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
239       "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
240
241 // Parallel arithmetic
242 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
243       (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
244       "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
245       "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
246       "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
247
248 // Flag setting.
249 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
250       (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
251       "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
252       "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
253       "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
254       "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
255       "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
256
257 // Sum of Absolute Difference
258 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
259       (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >;
260
261 // Integer Multiply
262 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
263       (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
264       "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
265       "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
266       "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
267
268 // Multiply Accumulate
269 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
270 // The store pipeline is used partly for 64-bit operations.
271 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
272       (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
273       "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
274       "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
275       "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
276       "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
277       "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
278       "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
279       "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
280       "SMLAL", "UMLAL", "SMLALBT",
281       "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
282       "UMAAL", "t2SMLAL", "t2UMLAL",
283       "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
284       "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
285
286 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
287       (instregex "t2SDIV", "t2UDIV")>;
288
289 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
290 // However, that's non-trivial to specify, so we keep it uniform
291 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
292       (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
293       "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
294       "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
295       "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
296       "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
297       "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
298 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
299       (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
300       "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
301       "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
302       "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
303       "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?",
304       "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
305
306 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
307 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
308
309 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri",
310       "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
311       "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
312       "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
313
314 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
315       "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
316       "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
317
318 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
319       "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
320       "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>;
321
322 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
323       (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
324       "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>;
325
326 def : InstRW<[R52WriteALU_EX1],
327     (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>;
328
329 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
330 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
331       (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
332
333 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
334 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
335 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
336 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
337
338 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
339       (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
340
341 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
342
343 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
344 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
345
346 // Integer Load, Multiple.
347 foreach Lat = 3-25 in {
348   def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
349     let Latency = Lat;
350   }
351   def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
352     let Latency = Lat;
353     let NumMicroOps = 0;
354   }
355 }
356 foreach NAddr = 1-16 in {
357   def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
358 }
359 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
360 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
361 def R52WriteILDM : SchedWriteVariant<[
362     SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
363
364     SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
365                                  R52WriteILDM6Cy]>,
366     SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
367                                  R52WriteILDM6Cy, R52WriteILDM7Cy]>,
368
369     SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
370                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
371                                  R52WriteILDM8Cy]>,
372     SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
373                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
374                                  R52WriteILDM8Cy, R52WriteILDM9Cy]>,
375
376     SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
377                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
378                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
379                                  R52WriteILDM10Cy]>,
380     SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
381                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
382                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
383                                  R52WriteILDM10Cy, R52WriteILDM11Cy]>,
384
385     SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
386                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
387                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
388                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
389                                  R52WriteILDM12Cy]>,
390     SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
391                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
392                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
393                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
394                                  R52WriteILDM12Cy, R52WriteILDM13Cy]>,
395
396     SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
397                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
398                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
399                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
400                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
401                                  R52WriteILDM14Cy]>,
402     SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
403                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
404                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
405                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
406                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
407                                  R52WriteILDM14Cy, R52WriteILDM15Cy]>,
408
409     SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
410                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
411                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
412                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
413                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
414                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
415                                  R52WriteILDM16Cy]>,
416     SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
417                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
418                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
419                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
420                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
421                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
422                                  R52WriteILDM16Cy, R52WriteILDM17Cy]>,
423
424     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
425                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
426                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
427                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
428                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
429                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
430                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
431                                  R52WriteILDM18Cy]>,
432     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
433                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
434                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
435                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
436                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
437                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
438                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
439                                  R52WriteILDM18Cy, R52WriteILDM19Cy]>,
440
441 // Unknown number of registers, just use resources for two registers.
442     SchedVar<NoSchedPred,      [R52WriteILDM4Cy, R52WriteILDM5Cy,
443                                 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
444                                 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
445                                 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
446                                 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
447                                 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
448                                 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
449                                 R52WriteILDM18Cy, R52WriteILDM19Cy]>
450 ]> { let Variadic=1; }
451
452 // Integer Store, Multiple
453 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
454   let Latency = 4;
455   let NumMicroOps = 2;
456 }
457 foreach NumAddr = 1-16 in {
458   def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
459 }
460 def R52WriteISTM : SchedWriteVariant<[
461     SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
462     SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
463     SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
464     SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
465     SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
466     SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
467     SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
468     SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
469     SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
470     SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
471     SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
472     SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
473     SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
474     SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
475     SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
476     // Unknow number of registers, just use resources for two registers.
477     SchedVar<NoSchedPred,      [R52WriteISTM2]>
478 ]>;
479
480 def : InstRW<[R52WriteILDM, R52Read_ISS],
481       (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
482       "(t|sys)LDM(IA|DA|DB|IB)$")>;
483 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
484       (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
485 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
486         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
487
488 // Integer Store, Single Element
489 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
490       (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
491       "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
492       "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
493
494 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
495       (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
496       "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
497       "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
498       "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
499
500 // Integer Store, Dual
501 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
502     (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>;
503 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
504     (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
505
506 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
507     (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
508 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
509     (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
510     "tPUSH")>;
511
512 // LDRLIT pseudo instructions, they expand to LDR + PICADD
513 def : InstRW<[R52WriteLd],
514       (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>;
515 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
516 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
517
518
519
520 //===----------------------------------------------------------------------===//
521 // VFP, Floating Point Support
522 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
523 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
524
525 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
526 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
527 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
528
529 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
530 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
531
532 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>;
533 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
534
535 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
536 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
537
538
539 //===----------------------------------------------------------------------===//
540 // Neon Support
541
542 // vector multiple load stores
543 foreach NumAddr = 1-16 in {
544   def R52LMAddrPred#NumAddr :
545     SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
546 }
547 foreach Lat = 1-32 in {
548   def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
549     let Latency = Lat;
550   }
551 }
552 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
553   def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
554     let Latency = 0;
555     let NumMicroOps = Num;
556     let ResourceCycles = [Num];
557   }
558 }
559 def R52WriteVLDM : SchedWriteVariant<[
560   // 1 D reg
561   SchedVar<R52LMAddrPred1,  [R52WriteLM5Cy,
562                               R52ReserveLd5Cy]>,
563   SchedVar<R52LMAddrPred2,  [R52WriteLM5Cy,
564                               R52ReserveLd5Cy]>,
565
566   // 2 D reg
567   SchedVar<R52LMAddrPred3,  [R52WriteLM5Cy, R52WriteLM6Cy,
568                               R52ReserveLd6Cy]>,
569   SchedVar<R52LMAddrPred4,  [R52WriteLM5Cy, R52WriteLM6Cy,
570                               R52ReserveLd6Cy]>,
571
572   // 3 D reg
573   SchedVar<R52LMAddrPred5,  [R52WriteLM5Cy, R52WriteLM6Cy,
574                               R52WriteLM7Cy,
575                               R52ReserveLd4Cy]>,
576   SchedVar<R52LMAddrPred6,  [R52WriteLM5Cy, R52WriteLM6Cy,
577                               R52WriteLM7Cy,
578                               R52ReserveLd7Cy]>,
579
580   // 4 D reg
581   SchedVar<R52LMAddrPred7,  [R52WriteLM5Cy, R52WriteLM6Cy,
582                               R52WriteLM7Cy, R52WriteLM8Cy,
583                               R52ReserveLd8Cy]>,
584   SchedVar<R52LMAddrPred8,  [R52WriteLM5Cy, R52WriteLM6Cy,
585                               R52WriteLM7Cy, R52WriteLM8Cy,
586                               R52ReserveLd8Cy]>,
587
588   // 5 D reg
589   SchedVar<R52LMAddrPred9,  [R52WriteLM5Cy, R52WriteLM6Cy,
590                               R52WriteLM7Cy, R52WriteLM8Cy,
591                               R52WriteLM9Cy,
592                               R52ReserveLd9Cy]>,
593   SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
594                               R52WriteLM7Cy, R52WriteLM8Cy,
595                               R52WriteLM9Cy,
596                               R52ReserveLd9Cy]>,
597
598   // 6 D reg
599   SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
600                               R52WriteLM7Cy, R52WriteLM8Cy,
601                               R52WriteLM9Cy, R52WriteLM10Cy,
602                               R52ReserveLd10Cy]>,
603   SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
604                               R52WriteLM7Cy, R52WriteLM8Cy,
605                               R52WriteLM9Cy, R52WriteLM10Cy,
606                               R52ReserveLd10Cy]>,
607
608   // 7 D reg
609   SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
610                               R52WriteLM7Cy, R52WriteLM8Cy,
611                               R52WriteLM9Cy, R52WriteLM10Cy,
612                               R52WriteLM11Cy,
613                               R52ReserveLd11Cy]>,
614   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
615                               R52WriteLM7Cy, R52WriteLM8Cy,
616                               R52WriteLM9Cy, R52WriteLM10Cy,
617                               R52WriteLM11Cy,
618                               R52ReserveLd11Cy]>,
619
620   // 8 D reg
621   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
622                               R52WriteLM7Cy, R52WriteLM8Cy,
623                               R52WriteLM9Cy, R52WriteLM10Cy,
624                               R52WriteLM11Cy, R52WriteLM12Cy,
625                               R52ReserveLd12Cy]>,
626   SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
627                               R52WriteLM7Cy, R52WriteLM8Cy,
628                               R52WriteLM9Cy, R52WriteLM10Cy,
629                               R52WriteLM11Cy, R52WriteLM12Cy,
630                               R52ReserveLd12Cy]>,
631   // unknown number of reg.
632   SchedVar<NoSchedPred,      [R52WriteLM5Cy, R52WriteLM6Cy,
633                               R52WriteLM7Cy, R52WriteLM8Cy,
634                               R52WriteLM9Cy, R52WriteLM10Cy,
635                               R52WriteLM11Cy, R52WriteLM12Cy,
636                               R52ReserveLd5Cy]>
637 ]> { let Variadic=1;}
638
639 // variable stores. Cannot dual-issue
640 def R52WriteSTM5  : SchedWriteRes<[R52UnitLd]> {
641   let Latency = 5;
642   let NumMicroOps = 2;
643   let ResourceCycles = [1];
644 }
645 def R52WriteSTM6  : SchedWriteRes<[R52UnitLd]> {
646   let Latency = 6;
647   let NumMicroOps = 4;
648   let ResourceCycles = [2];
649 }
650 def R52WriteSTM7  : SchedWriteRes<[R52UnitLd]> {
651   let Latency = 7;
652   let NumMicroOps = 6;
653   let ResourceCycles = [3];
654 }
655 def R52WriteSTM8  : SchedWriteRes<[R52UnitLd]> {
656   let Latency = 8;
657   let NumMicroOps = 8;
658   let ResourceCycles = [4];
659 }
660 def R52WriteSTM9  : SchedWriteRes<[R52UnitLd]> {
661   let Latency = 9;
662   let NumMicroOps = 10;
663   let ResourceCycles = [5];
664 }
665 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
666   let Latency = 10;
667   let NumMicroOps = 12;
668   let ResourceCycles = [6];
669 }
670 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
671   let Latency = 11;
672   let NumMicroOps = 14;
673   let ResourceCycles = [7];
674 }
675 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
676   let Latency = 12;
677   let NumMicroOps = 16;
678   let ResourceCycles = [8];
679 }
680 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
681   let Latency = 13;
682   let NumMicroOps = 18;
683   let ResourceCycles = [9];
684 }
685 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
686   let Latency = 14;
687   let NumMicroOps = 20;
688   let ResourceCycles = [10];
689 }
690 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
691   let Latency = 15;
692   let NumMicroOps = 22;
693   let ResourceCycles = [11];
694 }
695
696 def R52WriteSTM : SchedWriteVariant<[
697   SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
698   SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
699   SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
700   SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
701   SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
702   SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
703   SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
704   SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
705   SchedVar<R52LMAddrPred9,  [R52WriteSTM9]>,
706   SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
707   SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
708   SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
709   SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
710   SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
711   SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
712   SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
713   // unknown number of registers, just use resources for two
714   SchedVar<NoSchedPred,      [R52WriteSTM6]>
715 ]>;
716
717 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
718 // another instruction in slot-1, but only in the last issue.
719 def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
720 def : WriteRes<WriteVLD2, [R52UnitLd]> {
721   let Latency = 6;
722   let NumMicroOps = 3;
723   let ResourceCycles = [2];
724   let SingleIssue = 1;
725 }
726 def : WriteRes<WriteVLD3, [R52UnitLd]> {
727   let Latency = 7;
728   let NumMicroOps = 5;
729   let ResourceCycles = [3];
730   let SingleIssue = 1;
731 }
732 def : WriteRes<WriteVLD4, [R52UnitLd]> {
733   let Latency = 8;
734   let NumMicroOps = 7;
735   let ResourceCycles = [4];
736   let SingleIssue = 1;
737 }
738 def R52WriteVST1Mem  : SchedWriteRes<[R52UnitLd]> {
739   let Latency = 5;
740   let NumMicroOps = 1;
741   let ResourceCycles = [1];
742 }
743 def R52WriteVST2Mem  : SchedWriteRes<[R52UnitLd]> {
744   let Latency = 6;
745   let NumMicroOps = 3;
746   let ResourceCycles = [2];
747 }
748 def R52WriteVST3Mem  : SchedWriteRes<[R52UnitLd]> {
749   let Latency = 7;
750   let NumMicroOps = 5;
751   let ResourceCycles = [3];
752 }
753 def R52WriteVST4Mem  : SchedWriteRes<[R52UnitLd]> {
754   let Latency = 8;
755   let NumMicroOps = 7;
756   let ResourceCycles = [4];
757 }
758 def R52WriteVST5Mem  : SchedWriteRes<[R52UnitLd]> {
759   let Latency = 9;
760   let NumMicroOps = 9;
761   let ResourceCycles = [5];
762 }
763
764
765 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
766 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
767 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
768
769 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
770 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
771 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
772
773 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
774
775 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
776                                (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
777 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
778                                 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
779 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
780                                (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
781
782 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
783                                             (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
784
785 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
786 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
787
788 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
789 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
790
791 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
792 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
793
794 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
795       (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
796 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
797       (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
798
799 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
800 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
801 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
802 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
803
804 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
805 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
806
807 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
808 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
809
810 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
811 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
812
813 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
814 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
815 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
816 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
817 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
818 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
819 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
820 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
821 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
822 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
823                   (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
824 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
825                   (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
826 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
827 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
828 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
829                  (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
830 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
831 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
832
833 //---
834 // VSTx. Vector Stores
835 //---
836 // 1-element structure store
837 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
838 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
839 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
840 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
841 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
842 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
843
844 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
845 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
846 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
847
848 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
849 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
850 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
851 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
852 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
853 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
854
855 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
856 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
857 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
858
859 // 2-element structure store
860 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
861 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
862 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
863
864 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
865 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
866 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
867 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
868 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
869 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
870
871 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
872 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
873 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
874
875 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
876 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
877 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
878 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
879 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
880 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
881
882 // 3-element structure store
883 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
884 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
885 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
886
887 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
888 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
889 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
890 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
891 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
892 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
893
894 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
895 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
896 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
897
898 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
899 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
900 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
901 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
902 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
903 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
904
905 // 4-element structure store
906 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
907 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
908 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
909
910 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
911 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
912 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
913 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
915 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
916
917 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
918 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
919 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
920
921 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
922 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
923 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
924 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
926 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
927
928 } // R52 SchedModel