1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
12 //===----------------------------------------------------------------------===//
14 // ===---------------------------------------------------------------------===//
15 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17 // There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
18 // A number of forwarding paths enable results of computations to be input
19 // to subsequent operations before they are written to registers.
20 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
22 def CortexR52Model : SchedMachineModel {
23 let MicroOpBufferSize = 0; // R52 is in-order processor
24 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
25 let LoadLatency = 1; // Optimistic, assuming no misses
26 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
27 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
31 //===----------------------------------------------------------------------===//
32 // Define each kind of processor resource and number available.
34 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
35 // Cortex-R52 is an in-order processor.
37 def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
38 def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
39 def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
40 def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
41 def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
42 def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
43 def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
44 def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
46 // Cortex-R52 specific SchedReads
47 def R52Read_ISS : SchedRead;
48 def R52Read_EX1 : SchedRead;
49 def R52Read_EX2 : SchedRead;
50 def R52Read_WRI : SchedRead;
51 def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
52 def R52Read_F1 : SchedRead;
53 def R52Read_F2 : SchedRead;
56 //===----------------------------------------------------------------------===//
57 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
59 let SchedModel = CortexR52Model in {
61 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
62 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
63 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
65 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
68 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
69 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
70 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
72 // Multiply - aliased to sub-target specific later
74 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
75 def : WriteRes<WriteDIV, [R52UnitDiv]> {
76 let Latency = 8; let ResourceCycles = [8]; // non-pipelined
79 // Branches - LR written in Late EX2
80 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
81 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
82 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
85 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
87 // Integer pipeline by-passes
88 def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
89 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
90 def : ReadAdvance<ReadMUL, 0>;
91 def : ReadAdvance<ReadMAC, 0>;
93 // Floating-point. Map target-defined SchedReadWrites to subtarget
94 def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
96 def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
100 def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
101 let Latency = 11; // as it is internally two insns (MUL then ADD)
104 def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
105 R52UnitFPALU, R52UnitFPALU]> {
109 def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
110 let Latency = 7; // FP div takes fixed #cycles
111 let ResourceCycles = [7]; // is not pipelined
114 def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
116 let ResourceCycles = [17];
119 def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
120 def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
122 // Overriden via InstRW for this processor.
123 def : WriteRes<WriteVST1, []>;
124 def : WriteRes<WriteVST2, []>;
125 def : WriteRes<WriteVST3, []>;
126 def : WriteRes<WriteVST4, []>;
128 def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
129 def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
131 //===----------------------------------------------------------------------===//
132 // Subtarget-specific SchedReadWrites.
134 // Forwarding information - based on when an operand is read
135 def : ReadAdvance<R52Read_ISS, 0>;
136 def : ReadAdvance<R52Read_EX1, 1>;
137 def : ReadAdvance<R52Read_EX2, 2>;
138 def : ReadAdvance<R52Read_F0, 0>;
139 def : ReadAdvance<R52Read_F1, 1>;
140 def : ReadAdvance<R52Read_F2, 2>;
143 // Cortex-R52 specific SchedWrites for use with InstRW
144 def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
145 def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
146 let Latency = 4; let NumMicroOps = 0;
148 def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
149 let Latency = 8; let ResourceCycles = [8]; // not pipelined
151 def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152 def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
153 def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
154 def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
155 def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
156 def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
157 def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
159 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
160 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
162 // Alias generics to sub-target specific
163 def : SchedAlias<WriteMUL16, R52WriteMAC>;
164 def : SchedAlias<WriteMUL32, R52WriteMAC>;
165 def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
166 def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
167 def : SchedAlias<WriteMAC16, R52WriteMAC>;
168 def : SchedAlias<WriteMAC32, R52WriteMAC>;
169 def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
170 def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
171 def : SchedAlias<WritePreLd, R52WriteLd>;
172 def : SchedAlias<WriteLd, R52WriteLd>;
173 def : SchedAlias<WriteST, R52WriteST>;
175 def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
176 def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
179 def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
180 def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
183 def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
184 def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
187 def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
188 def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
191 def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
192 let Latency = 11; // as it is internally two insns (MUL then ADD)
194 def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
195 R52UnitFPALU, R52UnitFPALU]> {
199 def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
200 def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
202 //===----------------------------------------------------------------------===//
203 // Floating-point. Map target defined SchedReadWrites to processor specific ones
205 def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
206 def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
207 def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
208 def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
210 //===----------------------------------------------------------------------===//
211 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
213 def : InstRW<[WriteALU], (instrs COPY)>;
215 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
216 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
217 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
219 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
220 (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
221 "t2MOVi", "t2MOV_ga_dyn")>;
222 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
223 (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
224 def : InstRW<[R52WriteLd,R52Read_ISS],
225 (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
227 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
229 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
230 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
231 "(t|t2)UBFX", "(t|t2)SBFX")>;
233 // Saturating arithmetic
234 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
235 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
236 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
237 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
238 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
239 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
240 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
242 // Parallel arithmetic
243 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
244 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
245 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
246 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
247 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
250 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
251 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
252 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
253 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
254 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
255 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
256 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
258 // Sum of Absolute Difference
259 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
260 (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
263 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
264 (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
265 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
266 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
267 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
269 // Multiply Accumulate
270 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
271 // The store pipeline is used partly for 64-bit operations.
272 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
273 (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
274 "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
275 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
276 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
277 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
278 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
279 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
280 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
281 "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
282 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
283 "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
284 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
285 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
287 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
288 (instregex "t2SDIV", "t2UDIV")>;
290 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
291 // However, that's non-trivial to specify, so we keep it uniform
292 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
293 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
294 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
295 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
296 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
297 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
298 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
299 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
300 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
301 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
302 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
303 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
304 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
305 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
307 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
308 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
310 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
311 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
312 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
313 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
315 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
316 "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
317 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
319 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
320 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
321 "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
323 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
324 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
325 "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
327 def : InstRW<[R52WriteALU_EX1],
328 (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
330 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
331 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
332 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
334 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
335 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
336 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
337 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
339 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
340 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
342 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
344 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
345 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
347 // Integer Load, Multiple.
348 foreach Lat = 3-25 in {
349 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
352 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
357 foreach NAddr = 1-16 in {
358 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
360 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
361 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
362 def R52WriteILDM : SchedWriteVariant<[
363 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
365 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
367 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
368 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
370 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
371 R52WriteILDM6Cy, R52WriteILDM7Cy,
373 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
374 R52WriteILDM6Cy, R52WriteILDM7Cy,
375 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
377 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
378 R52WriteILDM6Cy, R52WriteILDM7Cy,
379 R52WriteILDM8Cy, R52WriteILDM9Cy,
381 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
382 R52WriteILDM6Cy, R52WriteILDM7Cy,
383 R52WriteILDM8Cy, R52WriteILDM9Cy,
384 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
386 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
387 R52WriteILDM6Cy, R52WriteILDM7Cy,
388 R52WriteILDM8Cy, R52WriteILDM9Cy,
389 R52WriteILDM10Cy, R52WriteILDM11Cy,
391 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
392 R52WriteILDM6Cy, R52WriteILDM7Cy,
393 R52WriteILDM8Cy, R52WriteILDM9Cy,
394 R52WriteILDM10Cy, R52WriteILDM11Cy,
395 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
397 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
398 R52WriteILDM6Cy, R52WriteILDM7Cy,
399 R52WriteILDM8Cy, R52WriteILDM9Cy,
400 R52WriteILDM10Cy, R52WriteILDM11Cy,
401 R52WriteILDM12Cy, R52WriteILDM13Cy,
403 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
404 R52WriteILDM6Cy, R52WriteILDM7Cy,
405 R52WriteILDM8Cy, R52WriteILDM9Cy,
406 R52WriteILDM10Cy, R52WriteILDM11Cy,
407 R52WriteILDM12Cy, R52WriteILDM13Cy,
408 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
410 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
411 R52WriteILDM6Cy, R52WriteILDM7Cy,
412 R52WriteILDM8Cy, R52WriteILDM9Cy,
413 R52WriteILDM10Cy, R52WriteILDM11Cy,
414 R52WriteILDM12Cy, R52WriteILDM13Cy,
415 R52WriteILDM14Cy, R52WriteILDM15Cy,
417 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
418 R52WriteILDM6Cy, R52WriteILDM7Cy,
419 R52WriteILDM8Cy, R52WriteILDM9Cy,
420 R52WriteILDM10Cy, R52WriteILDM11Cy,
421 R52WriteILDM12Cy, R52WriteILDM13Cy,
422 R52WriteILDM14Cy, R52WriteILDM15Cy,
423 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
425 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
426 R52WriteILDM6Cy, R52WriteILDM7Cy,
427 R52WriteILDM8Cy, R52WriteILDM9Cy,
428 R52WriteILDM10Cy, R52WriteILDM11Cy,
429 R52WriteILDM12Cy, R52WriteILDM13Cy,
430 R52WriteILDM14Cy, R52WriteILDM15Cy,
431 R52WriteILDM16Cy, R52WriteILDM17Cy,
433 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
434 R52WriteILDM6Cy, R52WriteILDM7Cy,
435 R52WriteILDM8Cy, R52WriteILDM9Cy,
436 R52WriteILDM10Cy, R52WriteILDM11Cy,
437 R52WriteILDM12Cy, R52WriteILDM13Cy,
438 R52WriteILDM14Cy, R52WriteILDM15Cy,
439 R52WriteILDM16Cy, R52WriteILDM17Cy,
440 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
442 // Unknown number of registers, just use resources for two registers.
443 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
444 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
445 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
446 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
447 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
448 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
449 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
450 R52WriteILDM18Cy, R52WriteILDM19Cy]>
451 ]> { let Variadic=1; }
453 // Integer Store, Multiple
454 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
458 foreach NumAddr = 1-16 in {
459 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
461 def R52WriteISTM : SchedWriteVariant<[
462 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
463 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
464 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
465 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
466 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
467 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
468 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
469 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
470 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
471 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
472 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
473 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
474 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
475 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
476 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
477 // Unknow number of registers, just use resources for two registers.
478 SchedVar<NoSchedPred, [R52WriteISTM2]>
481 def : InstRW<[R52WriteILDM, R52Read_ISS],
482 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
483 "(t|sys)LDM(IA|DA|DB|IB)$")>;
484 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
485 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
486 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
487 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
489 // Integer Store, Single Element
490 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
491 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
492 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
493 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
495 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
496 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
497 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
498 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
499 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
501 // Integer Store, Dual
502 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
503 (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
504 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
505 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
507 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
508 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
509 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
510 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
513 // LDRLIT pseudo instructions, they expand to LDR + PICADD
514 def : InstRW<[R52WriteLd],
515 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
516 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
517 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
521 //===----------------------------------------------------------------------===//
522 // VFP, Floating Point Support
523 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
524 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
526 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
527 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
528 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
530 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
531 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
533 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
534 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
536 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
537 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
540 //===----------------------------------------------------------------------===//
543 // vector multiple load stores
544 foreach NumAddr = 1-16 in {
545 def R52LMAddrPred#NumAddr :
546 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
548 foreach Lat = 1-32 in {
549 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
553 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
554 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
556 let NumMicroOps = Num;
557 let ResourceCycles = [Num];
560 def R52WriteVLDM : SchedWriteVariant<[
562 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
564 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
568 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
570 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
574 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
577 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
582 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
583 R52WriteLM7Cy, R52WriteLM8Cy,
585 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
586 R52WriteLM7Cy, R52WriteLM8Cy,
590 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
591 R52WriteLM7Cy, R52WriteLM8Cy,
594 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
595 R52WriteLM7Cy, R52WriteLM8Cy,
600 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
601 R52WriteLM7Cy, R52WriteLM8Cy,
602 R52WriteLM9Cy, R52WriteLM10Cy,
604 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
605 R52WriteLM7Cy, R52WriteLM8Cy,
606 R52WriteLM9Cy, R52WriteLM10Cy,
610 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
611 R52WriteLM7Cy, R52WriteLM8Cy,
612 R52WriteLM9Cy, R52WriteLM10Cy,
615 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
616 R52WriteLM7Cy, R52WriteLM8Cy,
617 R52WriteLM9Cy, R52WriteLM10Cy,
622 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
623 R52WriteLM7Cy, R52WriteLM8Cy,
624 R52WriteLM9Cy, R52WriteLM10Cy,
625 R52WriteLM11Cy, R52WriteLM12Cy,
627 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
628 R52WriteLM7Cy, R52WriteLM8Cy,
629 R52WriteLM9Cy, R52WriteLM10Cy,
630 R52WriteLM11Cy, R52WriteLM12Cy,
632 // unknown number of reg.
633 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
634 R52WriteLM7Cy, R52WriteLM8Cy,
635 R52WriteLM9Cy, R52WriteLM10Cy,
636 R52WriteLM11Cy, R52WriteLM12Cy,
638 ]> { let Variadic=1;}
640 // variable stores. Cannot dual-issue
641 def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
644 let ResourceCycles = [1];
646 def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
649 let ResourceCycles = [2];
651 def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
654 let ResourceCycles = [3];
656 def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
659 let ResourceCycles = [4];
661 def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
663 let NumMicroOps = 10;
664 let ResourceCycles = [5];
666 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
668 let NumMicroOps = 12;
669 let ResourceCycles = [6];
671 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
673 let NumMicroOps = 14;
674 let ResourceCycles = [7];
676 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
678 let NumMicroOps = 16;
679 let ResourceCycles = [8];
681 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
683 let NumMicroOps = 18;
684 let ResourceCycles = [9];
686 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
688 let NumMicroOps = 20;
689 let ResourceCycles = [10];
691 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
693 let NumMicroOps = 22;
694 let ResourceCycles = [11];
697 def R52WriteSTM : SchedWriteVariant<[
698 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
699 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
700 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
701 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
702 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
703 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
704 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
705 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
706 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
707 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
708 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
709 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
710 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
711 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
712 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
713 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
714 // unknown number of registers, just use resources for two
715 SchedVar<NoSchedPred, [R52WriteSTM6]>
718 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
719 // another instruction in slot-1, but only in the last issue.
720 def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
721 def : WriteRes<WriteVLD2, [R52UnitLd]> {
724 let ResourceCycles = [2];
727 def : WriteRes<WriteVLD3, [R52UnitLd]> {
730 let ResourceCycles = [3];
733 def : WriteRes<WriteVLD4, [R52UnitLd]> {
736 let ResourceCycles = [4];
739 def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
742 let ResourceCycles = [1];
744 def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
747 let ResourceCycles = [2];
749 def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
752 let ResourceCycles = [3];
754 def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
757 let ResourceCycles = [4];
759 def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
762 let ResourceCycles = [5];
766 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
767 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
768 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
770 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
771 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
772 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
774 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
776 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
777 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
778 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
779 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
780 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
781 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
783 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
784 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
786 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
787 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
789 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
790 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
792 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
793 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
795 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
797 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
798 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
799 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
800 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
802 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
803 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
804 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
805 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
807 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
808 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
810 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
811 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
813 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
814 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
816 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
817 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
818 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
819 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
820 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
821 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
822 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
823 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
824 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
825 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
826 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
827 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
828 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
829 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
830 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
831 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
832 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
833 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
834 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
837 // VSTx. Vector Stores
839 // 1-element structure store
840 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
841 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
842 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
843 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
844 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
845 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
847 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
848 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
849 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
851 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
852 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
853 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
854 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
855 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
856 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
858 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
859 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
860 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
862 // 2-element structure store
863 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
864 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
865 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
867 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
868 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
869 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
870 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
871 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
872 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
874 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
875 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
876 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
878 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
879 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
880 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
881 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
882 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
883 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
885 // 3-element structure store
886 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
887 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
888 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
890 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
891 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
892 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
893 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
894 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
895 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
897 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
898 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
899 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
901 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
902 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
903 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
904 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
905 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
906 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
908 // 4-element structure store
909 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
910 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
911 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
913 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
915 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
916 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
917 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
918 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
920 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
921 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
922 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
924 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
926 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
927 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
928 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
929 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;