]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
Merge ^/head r312201 through r312206.
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / ARM / ARMScheduleR52.td
1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
11 //
12 //===----------------------------------------------------------------------===//
13
14 // ===---------------------------------------------------------------------===//
15 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17 // There are two ALUs, one LDST, one MUL  and a non-pipelined integer DIV.
18 // A number of forwarding paths enable results of computations to be input
19 // to subsequent operations before they are written to registers.
20 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
21
22 def CortexR52Model : SchedMachineModel {
23   let MicroOpBufferSize = 0;  // R52 is in-order processor
24   let IssueWidth = 2;         // 2 micro-ops dispatched per cycle
25   let LoadLatency = 1;        // Optimistic, assuming no misses
26   let MispredictPenalty = 8;  // A branch direction mispredict, including PFU
27   let PostRAScheduler = 1;    // Enable PostRA scheduler pass.
28   let CompleteModel = 0;      // Covers instructions applicable to cortex-r52.
29 }
30
31
32 //===----------------------------------------------------------------------===//
33 // Define each kind of processor resource and number available.
34
35 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
36 // Cortex-R52 is an in-order processor.
37
38 def R52UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
39 def R52UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
40 def R52UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
41 def R52UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load/Store
42 def R52UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
43 def R52UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
44 def R52UnitFPMUL  : ProcResource<2> { let BufferSize = 0; } // FP MUL
45 def R52UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP DIV
46
47 // Cortex-R52 specific SchedReads
48 def R52Read_ISS   : SchedRead;
49 def R52Read_EX1   : SchedRead;
50 def R52Read_EX2   : SchedRead;
51 def R52Read_WRI   : SchedRead;
52 def R52Read_F0    : SchedRead; // F0 maps to ISS stage of integer pipe
53 def R52Read_F1    : SchedRead;
54 def R52Read_F2    : SchedRead;
55
56
57 //===----------------------------------------------------------------------===//
58 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
59
60 let SchedModel = CortexR52Model in {
61
62 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
63 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
65 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
66 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
67
68 // Compares
69 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
70 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
71 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
72
73 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
74 def : WriteRes<WriteDiv, [R52UnitDiv]> {
75   let Latency = 8; let ResourceCycles = [8]; // not pipelined
76 }
77
78 // Loads
79 def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; }
80 def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; }
81
82 // Branches  - LR written in Late EX2
83 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
84 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
85 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
86
87 // Misc
88 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
89 def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; }
90
91 def : ReadAdvance<ReadALU, 1>;   // Operand needed in EX1 stage
92 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
93
94
95 //===----------------------------------------------------------------------===//
96 // Subtarget-specific SchedReadWrites.
97
98 // Forwarding information - based on when an operand is read
99 def : ReadAdvance<R52Read_ISS, 0>;
100 def : ReadAdvance<R52Read_EX1, 1>;
101 def : ReadAdvance<R52Read_EX2, 2>;
102 def : ReadAdvance<R52Read_F0, 0>;
103 def : ReadAdvance<R52Read_F1, 1>;
104 def : ReadAdvance<R52Read_F2, 2>;
105
106
107 // Cortex-R52 specific SchedWrites for use with InstRW
108 def R52WriteMAC        : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
109 def R52WriteDIV        : SchedWriteRes<[R52UnitDiv]> {
110   let Latency = 8; let ResourceCycles = [8]; // not pipelined
111 }
112 def R52WriteLd         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
113 def R52WriteST         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
114 def R52WriteAdr        : SchedWriteRes<[]> { let Latency = 0; }
115 def R52WriteCC         : SchedWriteRes<[]> { let Latency = 0; }
116 def R52WriteALU_EX1    : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
117 def R52WriteALU_EX2    : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
118 def R52WriteALU_WRI    : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
119
120 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
121 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
122
123 def R52WriteFPALU_F3   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
124 def R52Write2FPALU_F3  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
125   let Latency = 4;
126 }
127 def R52WriteFPALU_F4   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
128 def R52Write2FPALU_F4  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
129   let Latency = 5;
130 }
131 def R52WriteFPALU_F5   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
132 def R52Write2FPALU_F5  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
133   let Latency = 6;
134 }
135 def R52WriteFPMUL_F5   : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
136 def R52Write2FPMUL_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
137   let Latency = 6;
138 }
139 def R52WriteFPMAC_F5   : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
140   let Latency = 11;     // as it is internally two insns (MUL then ADD)
141 }
142 def R52Write2FPMAC_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
143                                          R52UnitFPALU, R52UnitFPALU]> {
144   let Latency = 11;
145 }
146
147 def R52WriteFPLd_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
148 def R52WriteFPST_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
149
150 def R52WriteFPDIV_SP   : SchedWriteRes<[R52UnitFPDIV]> {
151   let Latency = 7;          // FP div takes fixed #cycles
152   let ResourceCycles = [7]; // is not pipelined
153  }
154 def R52WriteFPDIV_DP   : SchedWriteRes<[R52UnitFPDIV]> {
155   let Latency = 17;
156   let ResourceCycles = [17];
157 }
158
159
160 //===----------------------------------------------------------------------===//
161 // Subtarget-specific - map operands to SchedReadWrites
162
163 def : InstRW<[WriteALU], (instrs COPY)>;
164
165 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
166       (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
167       "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
168
169 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
170       (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
171       "t2MOVi", "t2MOV_ga_dyn")>;
172 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
173       (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
174 def : InstRW<[R52WriteLd,R52Read_ISS],
175       (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
176
177 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
178
179 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
180       (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
181       "(t|t2)UBFX", "(t|t2)SBFX")>;
182
183 // Saturating arithmetic
184 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
185       (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
186       "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
187       "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
188       "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
189       "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
190       "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
191
192 // Parallel arithmetic
193 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
194       (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
195       "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
196       "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
197       "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
198
199 // Flag setting.
200 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
201       (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
202       "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
203       "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
204       "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
205       "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
206       "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
207
208 // Sum of Absolute Difference
209 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
210       (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
211
212 // Integer Multiply
213 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
214       (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
215       "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
216       "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
217       "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
218
219 // Multiply Accumulate
220 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
221 // The store pipeline is used partly for 64-bit operations.
222 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
223       (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
224       "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
225       "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
226       "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
227       "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
228       "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
229       "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
230       "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
231       "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
232       "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
233       "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
234       "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
235       "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
236
237 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
238       (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
239
240 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
241 // However, that's non-trivial to specify, so we keep it uniform
242 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
243       (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
244       "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
245       "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
246       "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
247       "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
248       "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
249 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
250       (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
251       "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
252       "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
253       "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
254       "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
255       "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
256
257 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
258 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
259
260 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
261       "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
262       "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
263       "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
264
265 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
266       "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
267       "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
268
269 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
270       "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
271       "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
272
273 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
274       (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
275       "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
276
277 def : InstRW<[R52WriteALU_EX1],
278     (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
279
280 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
281 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
282       (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
283
284 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
285 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
286 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
287 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
288
289 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
290       (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
291
292 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
293
294 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
295 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
296
297 //def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>;
298 //def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>;
299 //def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>;
300
301 //def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>;
302 //def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>;
303 //def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>;
304
305
306 // Integer Load, Multiple.
307 foreach Lat = 3-25 in {
308   def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
309     let Latency = Lat;
310   }
311   def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
312     let Latency = Lat;
313     let NumMicroOps = 0;
314   }
315 }
316 foreach NAddr = 1-16 in {
317   def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
318 }
319 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
320 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
321 def R52WriteILDM : SchedWriteVariant<[
322     SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
323
324     SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
325                                  R52WriteILDM6Cy]>,
326     SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
327                                  R52WriteILDM6Cy, R52WriteILDM7Cy]>,
328
329     SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
330                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
331                                  R52WriteILDM8Cy]>,
332     SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
333                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
334                                  R52WriteILDM8Cy, R52WriteILDM9Cy]>,
335
336     SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
337                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
338                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
339                                  R52WriteILDM10Cy]>,
340     SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
341                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
342                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
343                                  R52WriteILDM10Cy, R52WriteILDM11Cy]>,
344
345     SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
346                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
347                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
348                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
349                                  R52WriteILDM12Cy]>,
350     SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
351                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
352                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
353                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
354                                  R52WriteILDM12Cy, R52WriteILDM13Cy]>,
355
356     SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
357                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
358                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
359                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
360                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
361                                  R52WriteILDM14Cy]>,
362     SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
363                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
364                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
365                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
366                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
367                                  R52WriteILDM14Cy, R52WriteILDM15Cy]>,
368
369     SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
370                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
371                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
372                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
373                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
374                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
375                                  R52WriteILDM16Cy]>,
376     SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
377                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
378                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
379                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
380                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
381                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
382                                  R52WriteILDM16Cy, R52WriteILDM17Cy]>,
383
384     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
385                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
386                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
387                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
388                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
389                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
390                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
391                                  R52WriteILDM18Cy]>,
392     SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
393                                  R52WriteILDM6Cy, R52WriteILDM7Cy,
394                                  R52WriteILDM8Cy, R52WriteILDM9Cy,
395                                  R52WriteILDM10Cy, R52WriteILDM11Cy,
396                                  R52WriteILDM12Cy, R52WriteILDM13Cy,
397                                  R52WriteILDM14Cy, R52WriteILDM15Cy,
398                                  R52WriteILDM16Cy, R52WriteILDM17Cy,
399                                  R52WriteILDM18Cy, R52WriteILDM19Cy]>,
400
401 // Unknown number of registers, just use resources for two registers.
402     SchedVar<NoSchedPred,      [R52WriteILDM4Cy, R52WriteILDM5Cy,
403                                 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
404                                 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
405                                 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
406                                 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
407                                 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
408                                 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
409                                 R52WriteILDM18Cy, R52WriteILDM19Cy]>
410 ]> { let Variadic=1; }
411
412 // Integer Store, Multiple
413 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
414   let Latency = 4;
415   let NumMicroOps = 2;
416 }
417 foreach NumAddr = 1-16 in {
418   def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
419 }
420 def R52WriteISTM : SchedWriteVariant<[
421     SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
422     SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
423     SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
424     SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
425     SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
426     SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
427     SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
428     SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
429     SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
430     SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
431     SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
432     SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
433     SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
434     SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
435     SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
436     // Unknow number of registers, just use resources for two registers.
437     SchedVar<NoSchedPred,      [R52WriteISTM2]>
438 ]>;
439
440 def : InstRW<[R52WriteILDM, R52Read_ISS],
441       (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
442       "(t|sys)LDM(IA|DA|DB|IB)$")>;
443 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
444       (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
445 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
446         (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
447
448 // Integer Store, Single Element
449 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
450       (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
451       "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
452       "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
453
454 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
455       (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
456       "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
457       "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
458       "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
459
460 // Integer Store, Dual
461 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
462     (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
463 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
464     (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
465
466 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
467     (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
468 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
469     (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
470     "PUSH", "tPUSH")>;
471
472 // LDRLIT pseudo instructions, they expand to LDR + PICADD
473 def : InstRW<[R52WriteLd],
474       (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
475 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
476 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
477
478
479
480 //===----------------------------------------------------------------------===//
481 // VFP, Floating Point Support
482 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
483 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
484
485 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
486 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
487 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
488
489 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
490 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
491
492 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
493 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
494
495 def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
496 def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
497
498 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
499                                           (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
500
501 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
502 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
503
504
505 //===----------------------------------------------------------------------===//
506 // Neon Support
507
508 // vector multiple load stores
509 foreach NumAddr = 1-16 in {
510   def R52LMAddrPred#NumAddr :
511     SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
512 }
513 foreach Lat = 1-32 in {
514   def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
515     let Latency = Lat;
516   }
517 }
518 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
519   def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
520     let Latency = 0;
521     let NumMicroOps = Num;
522     let ResourceCycles = [Num];
523   }
524 }
525 def R52WriteVLDM : SchedWriteVariant<[
526   // 1 D reg
527   SchedVar<R52LMAddrPred1,  [R52WriteLM5Cy,
528                               R52ReserveLd5Cy]>,
529   SchedVar<R52LMAddrPred2,  [R52WriteLM5Cy,
530                               R52ReserveLd5Cy]>,
531
532   // 2 D reg
533   SchedVar<R52LMAddrPred3,  [R52WriteLM5Cy, R52WriteLM6Cy,
534                               R52ReserveLd6Cy]>,
535   SchedVar<R52LMAddrPred4,  [R52WriteLM5Cy, R52WriteLM6Cy,
536                               R52ReserveLd6Cy]>,
537
538   // 3 D reg
539   SchedVar<R52LMAddrPred5,  [R52WriteLM5Cy, R52WriteLM6Cy,
540                               R52WriteLM7Cy,
541                               R52ReserveLd4Cy]>,
542   SchedVar<R52LMAddrPred6,  [R52WriteLM5Cy, R52WriteLM6Cy,
543                               R52WriteLM7Cy,
544                               R52ReserveLd7Cy]>,
545
546   // 4 D reg
547   SchedVar<R52LMAddrPred7,  [R52WriteLM5Cy, R52WriteLM6Cy,
548                               R52WriteLM7Cy, R52WriteLM8Cy,
549                               R52ReserveLd8Cy]>,
550   SchedVar<R52LMAddrPred8,  [R52WriteLM5Cy, R52WriteLM6Cy,
551                               R52WriteLM7Cy, R52WriteLM8Cy,
552                               R52ReserveLd8Cy]>,
553
554   // 5 D reg
555   SchedVar<R52LMAddrPred9,  [R52WriteLM5Cy, R52WriteLM6Cy,
556                               R52WriteLM7Cy, R52WriteLM8Cy,
557                               R52WriteLM9Cy,
558                               R52ReserveLd9Cy]>,
559   SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
560                               R52WriteLM7Cy, R52WriteLM8Cy,
561                               R52WriteLM9Cy,
562                               R52ReserveLd9Cy]>,
563
564   // 6 D reg
565   SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
566                               R52WriteLM7Cy, R52WriteLM8Cy,
567                               R52WriteLM9Cy, R52WriteLM10Cy,
568                               R52ReserveLd10Cy]>,
569   SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
570                               R52WriteLM7Cy, R52WriteLM8Cy,
571                               R52WriteLM9Cy, R52WriteLM10Cy,
572                               R52ReserveLd10Cy]>,
573
574   // 7 D reg
575   SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
576                               R52WriteLM7Cy, R52WriteLM8Cy,
577                               R52WriteLM9Cy, R52WriteLM10Cy,
578                               R52WriteLM11Cy,
579                               R52ReserveLd11Cy]>,
580   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
581                               R52WriteLM7Cy, R52WriteLM8Cy,
582                               R52WriteLM9Cy, R52WriteLM10Cy,
583                               R52WriteLM11Cy,
584                               R52ReserveLd11Cy]>,
585
586   // 8 D reg
587   SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
588                               R52WriteLM7Cy, R52WriteLM8Cy,
589                               R52WriteLM9Cy, R52WriteLM10Cy,
590                               R52WriteLM11Cy, R52WriteLM12Cy,
591                               R52ReserveLd12Cy]>,
592   SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
593                               R52WriteLM7Cy, R52WriteLM8Cy,
594                               R52WriteLM9Cy, R52WriteLM10Cy,
595                               R52WriteLM11Cy, R52WriteLM12Cy,
596                               R52ReserveLd12Cy]>,
597   // unknown number of reg.
598   SchedVar<NoSchedPred,      [R52WriteLM5Cy, R52WriteLM6Cy,
599                               R52WriteLM7Cy, R52WriteLM8Cy,
600                               R52WriteLM9Cy, R52WriteLM10Cy,
601                               R52WriteLM11Cy, R52WriteLM12Cy,
602                               R52ReserveLd5Cy]>
603 ]> { let Variadic=1;}
604
605 // variable stores. Cannot dual-issue
606 def R52WriteSTM5  : SchedWriteRes<[R52UnitLd]> {
607   let Latency = 5;
608   let NumMicroOps = 2;
609   let ResourceCycles = [1];
610 }
611 def R52WriteSTM6  : SchedWriteRes<[R52UnitLd]> {
612   let Latency = 6;
613   let NumMicroOps = 4;
614   let ResourceCycles = [2];
615 }
616 def R52WriteSTM7  : SchedWriteRes<[R52UnitLd]> {
617   let Latency = 7;
618   let NumMicroOps = 6;
619   let ResourceCycles = [3];
620 }
621 def R52WriteSTM8  : SchedWriteRes<[R52UnitLd]> {
622   let Latency = 8;
623   let NumMicroOps = 8;
624   let ResourceCycles = [4];
625 }
626 def R52WriteSTM9  : SchedWriteRes<[R52UnitLd]> {
627   let Latency = 9;
628   let NumMicroOps = 10;
629   let ResourceCycles = [5];
630 }
631 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
632   let Latency = 10;
633   let NumMicroOps = 12;
634   let ResourceCycles = [6];
635 }
636 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
637   let Latency = 11;
638   let NumMicroOps = 14;
639   let ResourceCycles = [7];
640 }
641 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
642   let Latency = 12;
643   let NumMicroOps = 16;
644   let ResourceCycles = [8];
645 }
646 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
647   let Latency = 13;
648   let NumMicroOps = 18;
649   let ResourceCycles = [9];
650 }
651 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
652   let Latency = 14;
653   let NumMicroOps = 20;
654   let ResourceCycles = [10];
655 }
656 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
657   let Latency = 15;
658   let NumMicroOps = 22;
659   let ResourceCycles = [11];
660 }
661
662 def R52WriteSTM : SchedWriteVariant<[
663   SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
664   SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
665   SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
666   SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
667   SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
668   SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
669   SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
670   SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
671   SchedVar<R52LMAddrPred9,  [R52WriteSTM9]>,
672   SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
673   SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
674   SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
675   SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
676   SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
677   SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
678   SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
679   // unknown number of registers, just use resources for two
680   SchedVar<NoSchedPred,      [R52WriteSTM6]>
681 ]>;
682
683 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
684 // another instruction in slot-1, but only in the last issue.
685 def R52WriteVLD1Mem  : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
686 def R52WriteVLD2Mem  : SchedWriteRes<[R52UnitLd]> {
687   let Latency = 6;
688   let NumMicroOps = 3;
689   let ResourceCycles = [2];
690 }
691 def R52WriteVLD3Mem  : SchedWriteRes<[R52UnitLd]> {
692   let Latency = 7;
693   let NumMicroOps = 5;
694   let ResourceCycles = [3];
695 }
696 def R52WriteVLD4Mem  : SchedWriteRes<[R52UnitLd]> {
697   let Latency = 8;
698   let NumMicroOps = 7;
699   let ResourceCycles = [4];
700 }
701 def R52WriteVST1Mem  : SchedWriteRes<[R52UnitLd]> {
702   let Latency = 5;
703   let NumMicroOps = 1;
704   let ResourceCycles = [1];
705 }
706 def R52WriteVST2Mem  : SchedWriteRes<[R52UnitLd]> {
707   let Latency = 6;
708   let NumMicroOps = 3;
709   let ResourceCycles = [2];
710 }
711 def R52WriteVST3Mem  : SchedWriteRes<[R52UnitLd]> {
712   let Latency = 7;
713   let NumMicroOps = 5;
714   let ResourceCycles = [3];
715 }
716 def R52WriteVST4Mem  : SchedWriteRes<[R52UnitLd]> {
717   let Latency = 8;
718   let NumMicroOps = 7;
719   let ResourceCycles = [4];
720 }
721 def R52WriteVST5Mem  : SchedWriteRes<[R52UnitLd]> {
722   let Latency = 9;
723   let NumMicroOps = 9;
724   let ResourceCycles = [5];
725 }
726
727
728 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
729 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
730 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
731
732 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
733 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
734 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
735
736 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
737
738 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
739                                (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
740 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
741                                 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
742 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
743                                (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
744
745 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
746                                             (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
747
748 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
749 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
750
751 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
752 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
753
754 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
755 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
756
757 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
758
759 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
760       (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
761 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
762       (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
763
764 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
765 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
766 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
767 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
768
769 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
770 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
771
772 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
773 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
774
775 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
776 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
777
778 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
779 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
780 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
781 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
782 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
783 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
784 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
785 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
786 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
787 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
788 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
789                   (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
790 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
791                   (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
792 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
793 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
794 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
795                  (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
796 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
797 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
798
799 //---
800 // VLDx. Vector Loads
801 //---
802 // 1-element structure load
803 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
804 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
805 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
806 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
807 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
808 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
809
810 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
811 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
812 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
813
814 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
815 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
816 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
817 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
818 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
819 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
820
821 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
822 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
823 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
824 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
825
826 // 2-element structure load
827 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
828 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
829 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
830 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
831 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
832 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
833
834 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
835 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
836 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
837 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
838 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
839 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
840 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
841 def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
842
843 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
844 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
845
846 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
847 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
848
849 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
850 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
851 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
852 def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
853
854 // 3-element structure load
855 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
856 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
857 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
858 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
859 def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
860 def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
861
862 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
863 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
864 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
865
866 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
867 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
868 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
869 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
870
871 // 4-element structure load
872 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
873 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
874 def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
875 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
876 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
877 def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
878
879
880 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
881 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
882 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
883 def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
884 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
885 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
886 def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
887
888 //---
889 // VSTx. Vector Stores
890 //---
891 // 1-element structure store
892 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
893 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
894 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
895 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
896 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
897 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
898
899 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
900 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
901 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
902
903 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
904 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
905 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
906 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
907 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
908 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
909
910 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
911 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
912 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
913
914 // 2-element structure store
915 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
916 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
917 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
918
919 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
920 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
921 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
922 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
923 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
924 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
925
926 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
927 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
928 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
929
930 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
931 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
932 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
933 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
934 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
935 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
936
937 // 3-element structure store
938 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
939 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
940 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
941
942 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
943 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
944 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
945 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
946 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
947 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
948
949 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
950 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
951 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
952
953 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
954 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
955 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
956 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
957 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
958 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
959
960 // 4-element structure store
961 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
962 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
963 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
964
965 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
966 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
967 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
968 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
969 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
970 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
971
972 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
973 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
974 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
975
976 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
977 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
978 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
979 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
980 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
981 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
982
983 } // R52 SchedModel