1 //==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
12 //===----------------------------------------------------------------------===//
14 // ===---------------------------------------------------------------------===//
15 // The Cortex-R52 is an in-order pipelined superscalar microprocessor with
16 // a 8 stage pipeline. It can issue maximum two instructions in each cycle.
17 // There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV.
18 // A number of forwarding paths enable results of computations to be input
19 // to subsequent operations before they are written to registers.
20 // This scheduler is a MachineScheduler. See TargetSchedule.td for details.
22 def CortexR52Model : SchedMachineModel {
23 let MicroOpBufferSize = 0; // R52 is in-order processor
24 let IssueWidth = 2; // 2 micro-ops dispatched per cycle
25 let LoadLatency = 1; // Optimistic, assuming no misses
26 let MispredictPenalty = 8; // A branch direction mispredict, including PFU
27 let CompleteModel = 0; // Covers instructions applicable to cortex-r52.
31 //===----------------------------------------------------------------------===//
32 // Define each kind of processor resource and number available.
34 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
35 // Cortex-R52 is an in-order processor.
37 def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
38 def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
39 def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
40 def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store
41 def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
42 def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU
43 def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL
44 def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV
46 // Cortex-R52 specific SchedReads
47 def R52Read_ISS : SchedRead;
48 def R52Read_EX1 : SchedRead;
49 def R52Read_EX2 : SchedRead;
50 def R52Read_WRI : SchedRead;
51 def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe
52 def R52Read_F1 : SchedRead;
53 def R52Read_F2 : SchedRead;
56 //===----------------------------------------------------------------------===//
57 // Subtarget-specific SchedWrite types which map ProcResources and set latency.
59 let SchedModel = CortexR52Model in {
61 // ALU - Write occurs in Late EX2 (independent of whether shift was required)
62 def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
63 def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
64 def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
65 def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
68 def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
69 def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
70 def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
72 // Multiply - aliased to sub-target specific later
74 // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
75 def : WriteRes<WriteDIV, [R52UnitDiv]> {
76 let Latency = 8; let ResourceCycles = [8]; // non-pipelined
79 // Branches - LR written in Late EX2
80 def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
81 def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
82 def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
85 def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
87 // Integer pipeline by-passes
88 def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage
89 def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
90 def : ReadAdvance<ReadMUL, 0>;
91 def : ReadAdvance<ReadMAC, 0>;
93 // Floating-point. Map target-defined SchedReadWrites to subtarget
94 def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
96 def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
100 def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
101 let Latency = 11; // as it is internally two insns (MUL then ADD)
104 def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
105 R52UnitFPALU, R52UnitFPALU]> {
109 def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
110 let Latency = 7; // FP div takes fixed #cycles
111 let ResourceCycles = [7]; // is not pipelined
114 def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
116 let ResourceCycles = [17];
119 def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
120 def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
122 // Overriden via InstRW for this processor.
123 def : WriteRes<WriteVST1, []>;
124 def : WriteRes<WriteVST2, []>;
125 def : WriteRes<WriteVST3, []>;
126 def : WriteRes<WriteVST4, []>;
128 def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
129 def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
131 //===----------------------------------------------------------------------===//
132 // Subtarget-specific SchedReadWrites.
134 // Forwarding information - based on when an operand is read
135 def : ReadAdvance<R52Read_ISS, 0>;
136 def : ReadAdvance<R52Read_EX1, 1>;
137 def : ReadAdvance<R52Read_EX2, 2>;
138 def : ReadAdvance<R52Read_F0, 0>;
139 def : ReadAdvance<R52Read_F1, 1>;
140 def : ReadAdvance<R52Read_F2, 2>;
143 // Cortex-R52 specific SchedWrites for use with InstRW
144 def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
145 def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> {
146 let Latency = 4; let NumMicroOps = 0;
148 def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> {
149 let Latency = 8; let ResourceCycles = [8]; // not pipelined
151 def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152 def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
153 def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; }
154 def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; }
155 def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
156 def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
157 def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
159 def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
160 def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
162 // Alias generics to sub-target specific
163 def : SchedAlias<WriteMUL16, R52WriteMAC>;
164 def : SchedAlias<WriteMUL32, R52WriteMAC>;
165 def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
166 def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
167 def : SchedAlias<WriteMAC16, R52WriteMAC>;
168 def : SchedAlias<WriteMAC32, R52WriteMAC>;
169 def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
170 def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
171 def : SchedAlias<WritePreLd, R52WriteLd>;
172 def : SchedAlias<WriteLd, R52WriteLd>;
173 def : SchedAlias<WriteST, R52WriteST>;
175 def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
176 def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
179 def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
180 def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
183 def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
184 def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
187 def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
188 def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
191 def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
192 let Latency = 11; // as it is internally two insns (MUL then ADD)
194 def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
195 R52UnitFPALU, R52UnitFPALU]> {
199 def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
200 def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
202 //===----------------------------------------------------------------------===//
203 // Floating-point. Map target defined SchedReadWrites to processor specific ones
205 def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>;
206 def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
207 def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
208 def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
210 //===----------------------------------------------------------------------===//
211 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
213 def : InstRW<[WriteALU], (instrs COPY)>;
215 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
216 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
217 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
219 def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
220 (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>;
221 def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
222 (instregex "MOV_ga_pcrel$")>;
223 def : InstRW<[R52WriteLd,R52Read_ISS],
224 (instregex "MOV_ga_pcrel_ldr")>;
226 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
228 def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
229 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
230 "(t|t2)UBFX", "(t|t2)SBFX")>;
232 // Saturating arithmetic
233 def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
234 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
235 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
236 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
237 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
238 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
239 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
241 // Parallel arithmetic
242 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
243 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
244 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
245 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
246 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
249 def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
250 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
251 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
252 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
253 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
254 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
255 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
257 // Sum of Absolute Difference
258 def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
259 (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >;
262 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
263 (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
264 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
265 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
266 "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
268 // Multiply Accumulate
269 // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
270 // The store pipeline is used partly for 64-bit operations.
271 def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
272 (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
273 "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
274 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
275 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
276 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
277 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
278 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
279 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
280 "SMLAL", "UMLAL", "SMLALBT",
281 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
282 "UMAAL", "t2SMLAL", "t2UMLAL",
283 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
284 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
286 def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
287 (instregex "t2SDIV", "t2UDIV")>;
289 // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
290 // However, that's non-trivial to specify, so we keep it uniform
291 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
292 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
293 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
294 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
295 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
296 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
297 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
298 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
299 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
300 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
301 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
302 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
303 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?",
304 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
306 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
307 def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
309 def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri",
310 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
311 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
312 "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
314 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
315 "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
316 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
318 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
319 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
320 "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>;
322 def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
323 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
324 "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>;
326 def : InstRW<[R52WriteALU_EX1],
327 (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>;
329 def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
330 def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
331 (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
333 def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
334 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
335 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
336 def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
338 def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
339 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
341 def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
343 def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
344 def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
346 // Integer Load, Multiple.
347 foreach Lat = 3-25 in {
348 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
351 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
356 foreach NAddr = 1-16 in {
357 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
359 def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
360 def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
361 def R52WriteILDM : SchedWriteVariant<[
362 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
364 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
366 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
367 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
369 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
370 R52WriteILDM6Cy, R52WriteILDM7Cy,
372 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
373 R52WriteILDM6Cy, R52WriteILDM7Cy,
374 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
376 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
377 R52WriteILDM6Cy, R52WriteILDM7Cy,
378 R52WriteILDM8Cy, R52WriteILDM9Cy,
380 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
381 R52WriteILDM6Cy, R52WriteILDM7Cy,
382 R52WriteILDM8Cy, R52WriteILDM9Cy,
383 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
385 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
386 R52WriteILDM6Cy, R52WriteILDM7Cy,
387 R52WriteILDM8Cy, R52WriteILDM9Cy,
388 R52WriteILDM10Cy, R52WriteILDM11Cy,
390 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
391 R52WriteILDM6Cy, R52WriteILDM7Cy,
392 R52WriteILDM8Cy, R52WriteILDM9Cy,
393 R52WriteILDM10Cy, R52WriteILDM11Cy,
394 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
396 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
397 R52WriteILDM6Cy, R52WriteILDM7Cy,
398 R52WriteILDM8Cy, R52WriteILDM9Cy,
399 R52WriteILDM10Cy, R52WriteILDM11Cy,
400 R52WriteILDM12Cy, R52WriteILDM13Cy,
402 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
403 R52WriteILDM6Cy, R52WriteILDM7Cy,
404 R52WriteILDM8Cy, R52WriteILDM9Cy,
405 R52WriteILDM10Cy, R52WriteILDM11Cy,
406 R52WriteILDM12Cy, R52WriteILDM13Cy,
407 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
409 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
410 R52WriteILDM6Cy, R52WriteILDM7Cy,
411 R52WriteILDM8Cy, R52WriteILDM9Cy,
412 R52WriteILDM10Cy, R52WriteILDM11Cy,
413 R52WriteILDM12Cy, R52WriteILDM13Cy,
414 R52WriteILDM14Cy, R52WriteILDM15Cy,
416 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
417 R52WriteILDM6Cy, R52WriteILDM7Cy,
418 R52WriteILDM8Cy, R52WriteILDM9Cy,
419 R52WriteILDM10Cy, R52WriteILDM11Cy,
420 R52WriteILDM12Cy, R52WriteILDM13Cy,
421 R52WriteILDM14Cy, R52WriteILDM15Cy,
422 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
424 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
425 R52WriteILDM6Cy, R52WriteILDM7Cy,
426 R52WriteILDM8Cy, R52WriteILDM9Cy,
427 R52WriteILDM10Cy, R52WriteILDM11Cy,
428 R52WriteILDM12Cy, R52WriteILDM13Cy,
429 R52WriteILDM14Cy, R52WriteILDM15Cy,
430 R52WriteILDM16Cy, R52WriteILDM17Cy,
432 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
433 R52WriteILDM6Cy, R52WriteILDM7Cy,
434 R52WriteILDM8Cy, R52WriteILDM9Cy,
435 R52WriteILDM10Cy, R52WriteILDM11Cy,
436 R52WriteILDM12Cy, R52WriteILDM13Cy,
437 R52WriteILDM14Cy, R52WriteILDM15Cy,
438 R52WriteILDM16Cy, R52WriteILDM17Cy,
439 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
441 // Unknown number of registers, just use resources for two registers.
442 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
443 R52WriteILDM6CyNo, R52WriteILDM7CyNo,
444 R52WriteILDM8CyNo, R52WriteILDM9CyNo,
445 R52WriteILDM10CyNo, R52WriteILDM11CyNo,
446 R52WriteILDM12CyNo, R52WriteILDM13CyNo,
447 R52WriteILDM14CyNo, R52WriteILDM15CyNo,
448 R52WriteILDM16CyNo, R52WriteILDM17CyNo,
449 R52WriteILDM18Cy, R52WriteILDM19Cy]>
450 ]> { let Variadic=1; }
452 // Integer Store, Multiple
453 def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
457 foreach NumAddr = 1-16 in {
458 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
460 def R52WriteISTM : SchedWriteVariant<[
461 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
462 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
463 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
464 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
465 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
466 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
467 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
468 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
469 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
470 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
471 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
472 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
473 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
474 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
475 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
476 // Unknow number of registers, just use resources for two registers.
477 SchedVar<NoSchedPred, [R52WriteISTM2]>
480 def : InstRW<[R52WriteILDM, R52Read_ISS],
481 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
482 "(t|sys)LDM(IA|DA|DB|IB)$")>;
483 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
484 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
485 def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
486 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
488 // Integer Store, Single Element
489 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
490 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
491 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
492 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
494 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
495 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
496 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
497 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
498 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
500 // Integer Store, Dual
501 def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
502 (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>;
503 def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
504 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
506 def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
507 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
508 def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
509 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
512 // LDRLIT pseudo instructions, they expand to LDR + PICADD
513 def : InstRW<[R52WriteLd],
514 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>;
515 // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
516 def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
520 //===----------------------------------------------------------------------===//
521 // VFP, Floating Point Support
522 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
523 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
525 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
526 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
527 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
529 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
530 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
532 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>;
533 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
535 def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
536 def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
539 //===----------------------------------------------------------------------===//
542 // vector multiple load stores
543 foreach NumAddr = 1-16 in {
544 def R52LMAddrPred#NumAddr :
545 SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
547 foreach Lat = 1-32 in {
548 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
552 foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
553 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
555 let NumMicroOps = Num;
556 let ResourceCycles = [Num];
559 def R52WriteVLDM : SchedWriteVariant<[
561 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy,
563 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy,
567 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy,
569 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy,
573 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy,
576 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy,
581 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy,
582 R52WriteLM7Cy, R52WriteLM8Cy,
584 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy,
585 R52WriteLM7Cy, R52WriteLM8Cy,
589 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy,
590 R52WriteLM7Cy, R52WriteLM8Cy,
593 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
594 R52WriteLM7Cy, R52WriteLM8Cy,
599 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
600 R52WriteLM7Cy, R52WriteLM8Cy,
601 R52WriteLM9Cy, R52WriteLM10Cy,
603 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
604 R52WriteLM7Cy, R52WriteLM8Cy,
605 R52WriteLM9Cy, R52WriteLM10Cy,
609 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
610 R52WriteLM7Cy, R52WriteLM8Cy,
611 R52WriteLM9Cy, R52WriteLM10Cy,
614 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
615 R52WriteLM7Cy, R52WriteLM8Cy,
616 R52WriteLM9Cy, R52WriteLM10Cy,
621 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
622 R52WriteLM7Cy, R52WriteLM8Cy,
623 R52WriteLM9Cy, R52WriteLM10Cy,
624 R52WriteLM11Cy, R52WriteLM12Cy,
626 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
627 R52WriteLM7Cy, R52WriteLM8Cy,
628 R52WriteLM9Cy, R52WriteLM10Cy,
629 R52WriteLM11Cy, R52WriteLM12Cy,
631 // unknown number of reg.
632 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy,
633 R52WriteLM7Cy, R52WriteLM8Cy,
634 R52WriteLM9Cy, R52WriteLM10Cy,
635 R52WriteLM11Cy, R52WriteLM12Cy,
637 ]> { let Variadic=1;}
639 // variable stores. Cannot dual-issue
640 def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> {
643 let ResourceCycles = [1];
645 def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> {
648 let ResourceCycles = [2];
650 def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> {
653 let ResourceCycles = [3];
655 def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> {
658 let ResourceCycles = [4];
660 def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> {
662 let NumMicroOps = 10;
663 let ResourceCycles = [5];
665 def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
667 let NumMicroOps = 12;
668 let ResourceCycles = [6];
670 def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
672 let NumMicroOps = 14;
673 let ResourceCycles = [7];
675 def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
677 let NumMicroOps = 16;
678 let ResourceCycles = [8];
680 def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
682 let NumMicroOps = 18;
683 let ResourceCycles = [9];
685 def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
687 let NumMicroOps = 20;
688 let ResourceCycles = [10];
690 def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
692 let NumMicroOps = 22;
693 let ResourceCycles = [11];
696 def R52WriteSTM : SchedWriteVariant<[
697 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
698 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
699 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
700 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
701 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
702 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
703 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
704 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
705 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>,
706 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
707 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
708 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
709 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
710 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
711 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
712 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
713 // unknown number of registers, just use resources for two
714 SchedVar<NoSchedPred, [R52WriteSTM6]>
717 // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
718 // another instruction in slot-1, but only in the last issue.
719 def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
720 def : WriteRes<WriteVLD2, [R52UnitLd]> {
723 let ResourceCycles = [2];
726 def : WriteRes<WriteVLD3, [R52UnitLd]> {
729 let ResourceCycles = [3];
732 def : WriteRes<WriteVLD4, [R52UnitLd]> {
735 let ResourceCycles = [4];
738 def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> {
741 let ResourceCycles = [1];
743 def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> {
746 let ResourceCycles = [2];
748 def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> {
751 let ResourceCycles = [3];
753 def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> {
756 let ResourceCycles = [4];
758 def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> {
761 let ResourceCycles = [5];
765 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
766 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
767 def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
769 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
770 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
771 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
773 def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
775 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
776 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
777 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
778 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
779 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
780 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
782 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
783 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
785 def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
786 def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
788 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
789 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
791 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
792 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
794 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
795 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
796 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
797 (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
799 def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
800 def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
801 def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
802 def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
804 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
805 def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
807 def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
808 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
810 def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
811 def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
813 def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
814 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
815 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
816 def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
817 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
818 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
819 def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
820 def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
821 def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
822 def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
823 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
824 def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
825 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
826 def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
827 def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
828 def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
829 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
830 def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
831 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
834 // VSTx. Vector Stores
836 // 1-element structure store
837 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
838 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
839 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
840 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
841 def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
842 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
844 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
845 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
846 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
848 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
849 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
850 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
851 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
852 def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
853 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
855 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
856 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
857 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
859 // 2-element structure store
860 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
861 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
862 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
864 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
865 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
866 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
867 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
868 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
869 def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
871 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
872 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
873 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
875 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
876 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
877 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
878 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
879 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
880 def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
882 // 3-element structure store
883 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
884 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
885 def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
887 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
888 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
889 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
890 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
891 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
892 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
894 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
895 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
896 def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
898 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
899 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
900 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
901 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
902 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
903 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
905 // 4-element structure store
906 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
907 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
908 def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
910 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
911 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
912 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
913 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
914 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
915 def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
917 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
918 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
919 def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
921 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
922 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
923 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
924 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
925 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
926 def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;