1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
12 //===----------------------------------------------------------------------===//
14 // ===---------------------------------------------------------------------===//
15 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17 // There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
18 // A number of forwarding paths enable results of computations to be input
19 // to subsequent operations before they are written to registers.
20 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
22 def CortexR52Model : SchedMachineModel {
23 let MicroOpBufferSize = 0; // R52 is in-order processor
24 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
25 let LoadLatency = 1; // Optimistic, assuming no misses
26 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
27 let PostRAScheduler = 1; // Enable PostRA scheduler pass.
28 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
32 //===----------------------------------------------------------------------===//
33 // Define each kind of processor resource and number available.
35 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
36 // Cortex-R52 is an in-order processor.
38 def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
39 def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
40 def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
41 def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
42 def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
43 def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
44 def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
45 def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
47 // Cortex-R52 specific SchedReads
48 def R52Read_ISS : SchedRead;
49 def R52Read_EX1 : SchedRead;
50 def R52Read_EX2 : SchedRead;
51 def R52Read_WRI : SchedRead;
52 def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
53 def R52Read_F1 : SchedRead;
54 def R52Read_F2 : SchedRead;
57 //===----------------------------------------------------------------------===//
58 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
60 let SchedModel = CortexR52Model in {
62 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
63 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
65 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
66 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
69 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
70 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
71 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
73 // Multiply - aliased to sub-target specific later
75 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
76 def : WriteRes<WriteDIV, [R52UnitDiv]> {
77 let Latency = 8; let ResourceCycles = [8]; // non-pipelined
80 // Branches - LR written in Late EX2
81 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
82 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
83 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
86 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
88 // Integer pipeline by-passes
89 def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
90 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
91 def : ReadAdvance<ReadMUL, 0>;
92 def : ReadAdvance<ReadMAC, 0>;
94 // Floating-point. Map target-defined SchedReadWrites to subtarget
95 def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
97 def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
101 def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
102 let Latency = 11; // as it is internally two insns (MUL then ADD)
105 def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
106 R52UnitFPALU, R52UnitFPALU]> {
110 def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
111 let Latency = 7; // FP div takes fixed #cycles
112 let ResourceCycles = [7]; // is not pipelined
115 def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
117 let ResourceCycles = [17];
120 def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
121 def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
123 // Overriden via InstRW for this processor.
124 def : WriteRes<WriteVST1, []>;
125 def : WriteRes<WriteVST2, []>;
126 def : WriteRes<WriteVST3, []>;
127 def : WriteRes<WriteVST4, []>;
129 def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
130 def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
132 //===----------------------------------------------------------------------===//
133 // Subtarget-specific SchedReadWrites.
135 // Forwarding information - based on when an operand is read
136 def : ReadAdvance<R52Read_ISS, 0>;
137 def : ReadAdvance<R52Read_EX1, 1>;
138 def : ReadAdvance<R52Read_EX2, 2>;
139 def : ReadAdvance<R52Read_F0, 0>;
140 def : ReadAdvance<R52Read_F1, 1>;
141 def : ReadAdvance<R52Read_F2, 2>;
144 // Cortex-R52 specific SchedWrites for use with InstRW
145 def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
146 def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
147 let Latency = 4; let NumMicroOps = 0;
149 def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
150 let Latency = 8; let ResourceCycles = [8]; // not pipelined
152 def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
153 def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
154 def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
155 def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
156 def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
157 def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
158 def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
160 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
161 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
163 // Alias generics to sub-target specific
164 def : SchedAlias<WriteMUL16, R52WriteMAC>;
165 def : SchedAlias<WriteMUL32, R52WriteMAC>;
166 def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
167 def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
168 def : SchedAlias<WriteMAC16, R52WriteMAC>;
169 def : SchedAlias<WriteMAC32, R52WriteMAC>;
170 def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
171 def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
172 def : SchedAlias<WritePreLd, R52WriteLd>;
173 def : SchedAlias<WriteLd, R52WriteLd>;
174 def : SchedAlias<WriteST, R52WriteST>;
176 def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
177 def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
180 def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
181 def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
184 def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
185 def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
188 def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
189 def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
192 def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
193 let Latency = 11; // as it is internally two insns (MUL then ADD)
195 def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
196 R52UnitFPALU, R52UnitFPALU]> {
200 def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
201 def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
203 //===----------------------------------------------------------------------===//
204 // Floating-point. Map target defined SchedReadWrites to processor specific ones
206 def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
207 def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
208 def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
209 def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
211 //===----------------------------------------------------------------------===//
212 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
214 def : InstRW<[WriteALU], (instrs COPY)>;
216 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
217 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
218 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
220 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
221 (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
222 "t2MOVi", "t2MOV_ga_dyn")>;
223 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
224 (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
225 def : InstRW<[R52WriteLd,R52Read_ISS],
226 (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
228 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
230 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
231 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
232 "(t|t2)UBFX", "(t|t2)SBFX")>;
234 // Saturating arithmetic
235 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
236 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
237 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
238 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
239 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
240 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
241 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
243 // Parallel arithmetic
244 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
245 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
246 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
247 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
248 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
251 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
252 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
253 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
254 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
255 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
256 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
257 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
259 // Sum of Absolute Difference
260 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
261 (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
264 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
265 (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
266 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
267 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
268 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
270 // Multiply Accumulate
271 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
272 // The store pipeline is used partly for 64-bit operations.
273 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
274 (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
275 "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
276 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
277 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
278 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
279 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
280 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
281 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
282 "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
283 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
284 "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
285 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
286 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
288 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
289 (instregex "t2SDIV", "t2UDIV")>;
291 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
292 // However, that's non-trivial to specify, so we keep it uniform
293 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
294 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
295 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
296 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
297 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
298 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
299 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
300 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
301 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
302 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
303 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
304 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
305 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
306 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
308 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
309 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
311 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
312 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
313 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
314 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
316 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
317 "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
318 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
320 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
321 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
322 "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
324 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
325 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
326 "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
328 def : InstRW<[R52WriteALU_EX1],
329 (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
331 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
332 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
333 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
335 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
336 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
337 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
338 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
340 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
341 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
343 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
345 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
346 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
348 // Integer Load, Multiple.
349 foreach Lat = 3-25 in {
350 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
353 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
358 foreach NAddr = 1-16 in {
359 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
361 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
362 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
363 def R52WriteILDM : SchedWriteVariant<[
364 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
366 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
368 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
369 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
371 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
372 R52WriteILDM6Cy, R52WriteILDM7Cy,
374 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
375 R52WriteILDM6Cy, R52WriteILDM7Cy,
376 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
378 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
379 R52WriteILDM6Cy, R52WriteILDM7Cy,
380 R52WriteILDM8Cy, R52WriteILDM9Cy,
382 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
383 R52WriteILDM6Cy, R52WriteILDM7Cy,
384 R52WriteILDM8Cy, R52WriteILDM9Cy,
385 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
387 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
388 R52WriteILDM6Cy, R52WriteILDM7Cy,
389 R52WriteILDM8Cy, R52WriteILDM9Cy,
390 R52WriteILDM10Cy, R52WriteILDM11Cy,
392 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
393 R52WriteILDM6Cy, R52WriteILDM7Cy,
394 R52WriteILDM8Cy, R52WriteILDM9Cy,
395 R52WriteILDM10Cy, R52WriteILDM11Cy,
396 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
398 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
399 R52WriteILDM6Cy, R52WriteILDM7Cy,
400 R52WriteILDM8Cy, R52WriteILDM9Cy,
401 R52WriteILDM10Cy, R52WriteILDM11Cy,
402 R52WriteILDM12Cy, R52WriteILDM13Cy,
404 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
405 R52WriteILDM6Cy, R52WriteILDM7Cy,
406 R52WriteILDM8Cy, R52WriteILDM9Cy,
407 R52WriteILDM10Cy, R52WriteILDM11Cy,
408 R52WriteILDM12Cy, R52WriteILDM13Cy,
409 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
411 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
412 R52WriteILDM6Cy, R52WriteILDM7Cy,
413 R52WriteILDM8Cy, R52WriteILDM9Cy,
414 R52WriteILDM10Cy, R52WriteILDM11Cy,
415 R52WriteILDM12Cy, R52WriteILDM13Cy,
416 R52WriteILDM14Cy, R52WriteILDM15Cy,
418 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
419 R52WriteILDM6Cy, R52WriteILDM7Cy,
420 R52WriteILDM8Cy, R52WriteILDM9Cy,
421 R52WriteILDM10Cy, R52WriteILDM11Cy,
422 R52WriteILDM12Cy, R52WriteILDM13Cy,
423 R52WriteILDM14Cy, R52WriteILDM15Cy,
424 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
426 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
427 R52WriteILDM6Cy, R52WriteILDM7Cy,
428 R52WriteILDM8Cy, R52WriteILDM9Cy,
429 R52WriteILDM10Cy, R52WriteILDM11Cy,
430 R52WriteILDM12Cy, R52WriteILDM13Cy,
431 R52WriteILDM14Cy, R52WriteILDM15Cy,
432 R52WriteILDM16Cy, R52WriteILDM17Cy,
434 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
435 R52WriteILDM6Cy, R52WriteILDM7Cy,
436 R52WriteILDM8Cy, R52WriteILDM9Cy,
437 R52WriteILDM10Cy, R52WriteILDM11Cy,
438 R52WriteILDM12Cy, R52WriteILDM13Cy,
439 R52WriteILDM14Cy, R52WriteILDM15Cy,
440 R52WriteILDM16Cy, R52WriteILDM17Cy,
441 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
443 // Unknown number of registers, just use resources for two registers.
444 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
445 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
446 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
447 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
448 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
449 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
450 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
451 R52WriteILDM18Cy, R52WriteILDM19Cy]>
452 ]> { let Variadic=1; }
454 // Integer Store, Multiple
455 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
459 foreach NumAddr = 1-16 in {
460 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
462 def R52WriteISTM : SchedWriteVariant<[
463 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
464 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
465 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
466 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
467 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
468 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
469 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
470 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
471 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
472 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
473 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
474 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
475 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
476 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
477 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
478 // Unknow number of registers, just use resources for two registers.
479 SchedVar<NoSchedPred, [R52WriteISTM2]>
482 def : InstRW<[R52WriteILDM, R52Read_ISS],
483 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
484 "(t|sys)LDM(IA|DA|DB|IB)$")>;
485 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
486 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
487 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
488 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
490 // Integer Store, Single Element
491 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
492 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
493 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
494 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
496 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
497 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
498 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
499 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
500 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
502 // Integer Store, Dual
503 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
504 (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
505 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
506 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
508 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
509 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
510 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
511 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
514 // LDRLIT pseudo instructions, they expand to LDR + PICADD
515 def : InstRW<[R52WriteLd],
516 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
517 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
518 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
522 //===----------------------------------------------------------------------===//
523 // VFP, Floating Point Support
524 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
525 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
527 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
528 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
529 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
531 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
532 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
534 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
535 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
537 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
538 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
541 //===----------------------------------------------------------------------===//
544 // vector multiple load stores
545 foreach NumAddr = 1-16 in {
546 def R52LMAddrPred#NumAddr :
547 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
549 foreach Lat = 1-32 in {
550 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
554 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
555 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
557 let NumMicroOps = Num;
558 let ResourceCycles = [Num];
561 def R52WriteVLDM : SchedWriteVariant<[
563 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
565 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
569 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
571 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
575 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
578 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
583 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
584 R52WriteLM7Cy, R52WriteLM8Cy,
586 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
587 R52WriteLM7Cy, R52WriteLM8Cy,
591 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
592 R52WriteLM7Cy, R52WriteLM8Cy,
595 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
596 R52WriteLM7Cy, R52WriteLM8Cy,
601 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
602 R52WriteLM7Cy, R52WriteLM8Cy,
603 R52WriteLM9Cy, R52WriteLM10Cy,
605 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
606 R52WriteLM7Cy, R52WriteLM8Cy,
607 R52WriteLM9Cy, R52WriteLM10Cy,
611 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
612 R52WriteLM7Cy, R52WriteLM8Cy,
613 R52WriteLM9Cy, R52WriteLM10Cy,
616 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
617 R52WriteLM7Cy, R52WriteLM8Cy,
618 R52WriteLM9Cy, R52WriteLM10Cy,
623 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
624 R52WriteLM7Cy, R52WriteLM8Cy,
625 R52WriteLM9Cy, R52WriteLM10Cy,
626 R52WriteLM11Cy, R52WriteLM12Cy,
628 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
629 R52WriteLM7Cy, R52WriteLM8Cy,
630 R52WriteLM9Cy, R52WriteLM10Cy,
631 R52WriteLM11Cy, R52WriteLM12Cy,
633 // unknown number of reg.
634 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
635 R52WriteLM7Cy, R52WriteLM8Cy,
636 R52WriteLM9Cy, R52WriteLM10Cy,
637 R52WriteLM11Cy, R52WriteLM12Cy,
639 ]> { let Variadic=1;}
641 // variable stores. Cannot dual-issue
642 def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
645 let ResourceCycles = [1];
647 def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
650 let ResourceCycles = [2];
652 def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
655 let ResourceCycles = [3];
657 def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
660 let ResourceCycles = [4];
662 def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
664 let NumMicroOps = 10;
665 let ResourceCycles = [5];
667 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
669 let NumMicroOps = 12;
670 let ResourceCycles = [6];
672 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
674 let NumMicroOps = 14;
675 let ResourceCycles = [7];
677 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
679 let NumMicroOps = 16;
680 let ResourceCycles = [8];
682 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
684 let NumMicroOps = 18;
685 let ResourceCycles = [9];
687 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
689 let NumMicroOps = 20;
690 let ResourceCycles = [10];
692 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
694 let NumMicroOps = 22;
695 let ResourceCycles = [11];
698 def R52WriteSTM : SchedWriteVariant<[
699 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
700 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
701 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
702 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
703 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
704 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
705 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
706 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
707 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
708 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
709 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
710 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
711 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
712 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
713 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
714 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
715 // unknown number of registers, just use resources for two
716 SchedVar<NoSchedPred, [R52WriteSTM6]>
719 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
720 // another instruction in slot-1, but only in the last issue.
721 def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
722 def : WriteRes<WriteVLD2, [R52UnitLd]> {
725 let ResourceCycles = [2];
728 def : WriteRes<WriteVLD3, [R52UnitLd]> {
731 let ResourceCycles = [3];
734 def : WriteRes<WriteVLD4, [R52UnitLd]> {
737 let ResourceCycles = [4];
740 def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
743 let ResourceCycles = [1];
745 def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
748 let ResourceCycles = [2];
750 def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
753 let ResourceCycles = [3];
755 def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
758 let ResourceCycles = [4];
760 def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
763 let ResourceCycles = [5];
767 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
768 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
769 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
771 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
772 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
773 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
775 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
777 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
778 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
779 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
780 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
781 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
782 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
784 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
785 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
787 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
788 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
790 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
791 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
793 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
794 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
796 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
798 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
799 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
800 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
801 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
803 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
804 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
805 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
806 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
808 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
809 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
811 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
812 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
814 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
815 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
817 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
818 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
819 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
820 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
821 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
822 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
823 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
824 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
825 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
826 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
827 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
828 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
829 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
830 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
831 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
832 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
833 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
834 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
835 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
838 // VSTx. Vector Stores
840 // 1-element structure store
841 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
842 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
843 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
844 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
845 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
846 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
848 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
849 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
850 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
852 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
853 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
854 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
855 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
856 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
857 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
859 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
860 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
861 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
863 // 2-element structure store
864 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
865 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
866 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
868 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
869 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
870 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
871 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
872 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
873 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
875 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
876 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
877 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
879 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
880 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
881 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
882 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
883 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
884 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
886 // 3-element structure store
887 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
888 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
889 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
891 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
892 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
893 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
894 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
895 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
896 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
898 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
899 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
900 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
902 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
903 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
904 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
905 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
906 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
907 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
909 // 4-element structure store
910 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
911 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
912 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
915 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
916 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
917 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
918 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
919 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
921 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
922 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
923 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
926 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
927 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
928 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
929 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
930 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;