1 //=- AArch64SchedM1.td - Samsung Exynos-M1 Scheduling Defs ---*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the machine model for Samsung Exynos-M1 to support
11 // instruction scheduling and other instruction cost heuristics.
13 //===----------------------------------------------------------------------===//
15 //===----------------------------------------------------------------------===//
16 // The Exynos-M1 is a traditional superscalar microprocessor with a
17 // 4-wide in-order stage for decode and dispatch and a wider issue stage.
18 // The execution units and loads and stores are out-of-order.
20 def ExynosM1Model : SchedMachineModel {
21 let IssueWidth = 4; // Up to 4 uops per cycle.
22 let MicroOpBufferSize = 96; // ROB size.
23 let LoopMicroOpBufferSize = 24; // Based on the instruction queue size.
24 let LoadLatency = 4; // Optimistic load cases.
25 let MispredictPenalty = 14; // Minimum branch misprediction penalty.
26 let CompleteModel = 1; // Use the default model otherwise.
29 //===----------------------------------------------------------------------===//
30 // Define each kind of processor resource and number available on the Exynos-M1,
31 // which has 9 pipelines, each with its own queue with out-of-order dispatch.
33 def M1UnitA : ProcResource<2>; // Simple integer
34 def M1UnitC : ProcResource<1>; // Simple and complex integer
35 def M1UnitD : ProcResource<1>; // Integer division (inside C, serialized)
36 def M1UnitB : ProcResource<2>; // Branch
37 def M1UnitL : ProcResource<1>; // Load
38 def M1UnitS : ProcResource<1>; // Store
39 def M1PipeF0 : ProcResource<1>; // FP #0
40 let Super = M1PipeF0 in {
41 def M1UnitFMAC : ProcResource<1>; // FP multiplication
42 def M1UnitNAL0 : ProcResource<1>; // Simple vector
43 def M1UnitNMISC : ProcResource<1>; // Miscellanea
44 def M1UnitFCVT : ProcResource<1>; // FP conversion
45 def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
47 def M1PipeF1 : ProcResource<1>; // FP #1
48 let Super = M1PipeF1 in {
49 def M1UnitFADD : ProcResource<1>; // Simple FP
50 def M1UnitNAL1 : ProcResource<1>; // Simple vector
51 def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
52 def M1UnitFST : ProcResource<1>; // FP store
55 let SchedModel = ExynosM1Model in {
56 def M1UnitALU : ProcResGroup<[M1UnitA,
57 M1UnitC]>; // All integer
58 def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
59 M1UnitNAL1]>; // All simple vector
62 let SchedModel = ExynosM1Model in {
64 //===----------------------------------------------------------------------===//
65 // Coarse scheduling model for the Exynos-M1.
67 def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
68 def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
69 def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
70 def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
72 def M1WriteB1 : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
74 def M1WriteL5 : SchedWriteRes<[M1UnitL]> { let Latency = 5; }
75 def M1WriteLX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteL5,
77 SchedVar<NoSchedPred, [M1WriteL5]>]>;
79 def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
80 def M1WriteS2 : SchedWriteRes<[M1UnitS]> { let Latency = 2; }
81 def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
82 def M1WriteSX : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteS2,
84 SchedVar<NoSchedPred, [M1WriteS1]>]>;
86 def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
87 SchedVar<NoSchedPred, [ReadDefault]>]>;
88 def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
90 // Branch instructions.
91 // NOTE: Unconditional direct branches actually take neither cycles nor units.
92 def : WriteRes<WriteBr, [M1UnitB]> { let Latency = 1; }
93 def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
95 // Arithmetic and logical integer instructions.
96 def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; }
97 // TODO: Shift over 3 and some extensions take 2 cycles.
98 def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
99 def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
100 def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; }
102 // Move instructions.
103 def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
105 // Divide and multiply instructions.
106 def : WriteRes<WriteID32, [M1UnitC,
107 M1UnitD]> { let Latency = 13;
108 let ResourceCycles = [1, 13]; }
109 def : WriteRes<WriteID64, [M1UnitC,
110 M1UnitD]> { let Latency = 21;
111 let ResourceCycles = [1, 21]; }
112 // TODO: Long multiplication take 5 cycles and also the ALU.
113 // TODO: Multiplication with accumulation can be advanced.
114 def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
115 // TODO: 64-bit multiplication has a throughput of 1/2.
116 def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; }
118 // Miscellaneous instructions.
119 def : WriteRes<WriteExtr, [M1UnitALU,
120 M1UnitALU]> { let Latency = 2; }
122 // TODO: The latency for the post or pre register is 1 cycle.
123 def : WriteRes<WriteAdr, []> { let Latency = 0; }
125 // Load instructions.
126 def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; }
127 def : WriteRes<WriteLDHi, [M1UnitALU]> { let Latency = 4; }
128 def : SchedAlias<WriteLDIdx, M1WriteLX>;
130 // Store instructions.
131 def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; }
132 def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; }
133 def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; }
134 def : SchedAlias<WriteSTIdx, M1WriteSX>;
136 // FP data instructions.
137 def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; }
138 // TODO: FCCMP is much different.
139 def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
140 def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15;
141 let ResourceCycles = [15]; }
142 def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; }
144 // FP miscellaneous instructions.
145 // TODO: Conversion between register files is much different.
146 def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; }
147 def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; }
148 def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; }
150 // FP load instructions.
151 // TODO: ASIMD loads are much different.
152 def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
154 // FP store instructions.
155 // TODO: ASIMD stores are much different.
156 def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; }
158 // ASIMD FP instructions.
159 def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
161 // Other miscellaneous instructions.
162 def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
163 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
164 def : WriteRes<WriteHint, []> { let Latency = 1; }
165 def : WriteRes<WriteSys, []> { let Latency = 1; }
167 //===----------------------------------------------------------------------===//
168 // Generic fast forwarding.
170 // TODO: Add FP register forwarding rules.
172 def : ReadAdvance<ReadI, 0>;
173 def : ReadAdvance<ReadISReg, 0>;
174 def : ReadAdvance<ReadIEReg, 0>;
175 def : ReadAdvance<ReadIM, 0>;
176 // Integer multiply-accumulate.
177 // TODO: The forwarding for WriteIM64 saves actually 3 cycles.
178 def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
179 def : ReadAdvance<ReadID, 0>;
180 def : ReadAdvance<ReadExtrHi, 0>;
181 def : ReadAdvance<ReadAdrBase, 0>;
182 def : ReadAdvance<ReadVLD, 0>;
184 //===----------------------------------------------------------------------===//
185 // Finer scheduling model for the Exynos-M1.
187 def M1WriteNEONA : SchedWriteRes<[M1UnitNALU,
189 M1UnitFADD]> { let Latency = 9; }
190 def M1WriteNEONB : SchedWriteRes<[M1UnitNALU,
191 M1UnitFST]> { let Latency = 5; }
192 def M1WriteNEONC : SchedWriteRes<[M1UnitNALU,
193 M1UnitFST]> { let Latency = 6; }
194 def M1WriteNEOND : SchedWriteRes<[M1UnitNALU,
196 M1UnitL]> { let Latency = 10; }
197 def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT,
198 M1UnitFST]> { let Latency = 8; }
199 def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT,
201 M1UnitL]> { let Latency = 13; }
202 def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC,
203 M1UnitFST]> { let Latency = 6; }
204 def M1WriteNEONH : SchedWriteRes<[M1UnitNALU,
205 M1UnitFST]> { let Latency = 3; }
206 def M1WriteNEONI : SchedWriteRes<[M1UnitFST,
207 M1UnitL]> { let Latency = 9; }
208 def M1WriteNEONJ : SchedWriteRes<[M1UnitNMISC,
209 M1UnitFMAC]> { let Latency = 6; }
210 def M1WriteNEONK : SchedWriteRes<[M1UnitNMISC,
211 M1UnitFMAC]> { let Latency = 7; }
212 def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; }
213 def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; }
214 def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; }
215 def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; }
216 def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; }
217 def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15;
218 let ResourceCycles = [15]; }
219 def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23;
220 let ResourceCycles = [23]; }
221 def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; }
222 def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; }
223 def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; }
224 def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; }
225 def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; }
226 def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
227 def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; }
228 def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; }
229 def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; }
230 def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; }
231 def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; }
232 def M1WriteTB : SchedWriteRes<[M1UnitC,
233 M1UnitALU]> { let Latency = 2; }
234 def M1WriteVLDA : SchedWriteRes<[M1UnitL,
235 M1UnitL]> { let Latency = 6; }
236 def M1WriteVLDB : SchedWriteRes<[M1UnitL,
238 M1UnitL]> { let Latency = 7; }
239 def M1WriteVLDC : SchedWriteRes<[M1UnitL,
242 M1UnitL]> { let Latency = 8; }
243 def M1WriteVLDD : SchedWriteRes<[M1UnitL,
244 M1UnitNALU]> { let Latency = 7;
245 let ResourceCycles = [2]; }
246 def M1WriteVLDE : SchedWriteRes<[M1UnitL,
247 M1UnitNALU]> { let Latency = 6; }
248 def M1WriteVLDF : SchedWriteRes<[M1UnitL,
249 M1UnitL]> { let Latency = 10;
250 let ResourceCycles = [5]; }
251 def M1WriteVLDG : SchedWriteRes<[M1UnitL,
253 M1UnitNALU]> { let Latency = 7;
254 let ResourceCycles = [2]; }
255 def M1WriteVLDH : SchedWriteRes<[M1UnitL,
257 M1UnitNALU]> { let Latency = 6; }
258 def M1WriteVLDI : SchedWriteRes<[M1UnitL,
260 M1UnitL]> { let Latency = 12;
261 let ResourceCycles = [6]; }
262 def M1WriteVLDJ : SchedWriteRes<[M1UnitL,
265 M1UnitNALU]> { let Latency = 9;
266 let ResourceCycles = [4]; }
267 def M1WriteVLDK : SchedWriteRes<[M1UnitL,
271 M1UnitNALU]> { let Latency = 9;
272 let ResourceCycles = [4]; }
273 def M1WriteVLDL : SchedWriteRes<[M1UnitL,
276 M1UnitNALU]> { let Latency = 7;
277 let ResourceCycles = [2]; }
278 def M1WriteVLDM : SchedWriteRes<[M1UnitL,
282 M1UnitNALU]> { let Latency = 7;
283 let ResourceCycles = [2]; }
284 def M1WriteVLDN : SchedWriteRes<[M1UnitL,
287 M1UnitL]> { let Latency = 14;
288 let ResourceCycles = [7]; }
290 def M1WriteVSTA : WriteSequence<[WriteVST], 2>;
291 def M1WriteVSTB : WriteSequence<[WriteVST], 3>;
292 def M1WriteVSTC : WriteSequence<[WriteVST], 4>;
293 def M1WriteVSTD : SchedWriteRes<[M1UnitS,
295 M1UnitFST]> { let Latency = 7;
296 let ResourceCycles = [7]; }
297 def M1WriteVSTE : SchedWriteRes<[M1UnitS,
301 M1UnitFST]> { let Latency = 8;
302 let ResourceCycles = [8]; }
303 def M1WriteVSTF : SchedWriteRes<[M1UnitNALU,
309 M1UnitFST]> { let Latency = 15;
310 let ResourceCycles = [15]; }
311 def M1WriteVSTG : SchedWriteRes<[M1UnitNALU,
319 M1UnitFST]> { let Latency = 16;
320 let ResourceCycles = [16]; }
321 def M1WriteVSTH : SchedWriteRes<[M1UnitNALU,
325 M1UnitFST]> { let Latency = 14;
326 let ResourceCycles = [14]; }
327 def M1WriteVSTI : SchedWriteRes<[M1UnitNALU,
337 M1UnitFST]> { let Latency = 17;
338 let ResourceCycles = [17]; }
340 // Branch instructions
341 def : InstRW<[M1WriteB1], (instrs Bcc)>;
342 // NOTE: Conditional branch and link adds a B uop.
343 def : InstRW<[M1WriteA1], (instrs BL)>;
344 // NOTE: Indirect branch and link with LR adds an ALU uop.
345 def : InstRW<[M1WriteA1,
346 M1WriteC1], (instrs BLR)>;
347 def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
348 def : InstRW<[M1WriteC1,
349 M1WriteA2], (instregex "^TBN?Z[WX]")>;
351 // Arithmetic and logical integer instructions.
352 def : InstRW<[M1WriteA1], (instrs COPY)>;
354 // Divide and multiply instructions.
356 // Miscellaneous instructions.
358 // Load instructions.
360 // Store instructions.
362 // FP data instructions.
363 def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
364 def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>;
365 def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>;
366 def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>;
367 def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>;
368 def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>;
369 def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>;
370 def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>;
371 def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
372 def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>;
373 def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>;
374 def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
375 def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
377 // FP miscellaneous instructions.
378 def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>;
379 def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
380 def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>;
381 def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>;
382 def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>;
383 def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>;
385 // FP load instructions.
387 // FP store instructions.
389 // ASIMD instructions.
390 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
391 def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>;
392 def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>;
393 def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>;
394 def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
395 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>;
396 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>;
397 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>;
398 def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>;
399 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>;
400 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>;
401 def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
402 def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>;
403 def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
404 def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>;
405 def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>;
406 def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>;
407 def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>;
408 def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
409 def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
410 def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
411 def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>;
412 def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>;
413 def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>;
414 def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>;
415 def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>;
417 // ASIMD FP instructions.
418 def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>;
419 def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>;
420 def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>;
421 def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
422 def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>;
423 def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>;
424 def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>;
425 def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>;
426 def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>;
427 def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>;
428 def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
429 def : InstRW<[M1WriteNEONJ], (instregex "^FMULX?v.i")>;
430 def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v.f")>;
431 def : InstRW<[M1WriteNEONK], (instregex "^FML[AS]v.i")>;
432 def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v.f")>;
433 def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>;
435 // ASIMD miscellaneous instructions.
436 def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>;
437 def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>;
438 def : InstRW<[M1WriteNALU1], (instregex "^CPY")>;
439 def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>;
440 def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>;
441 def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>;
442 def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>;
443 def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev")>;
444 def : InstRW<[M1WriteNMISC1], (instregex "^[FU](RECP|RSQRT)Xv")>;
445 def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>;
446 def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>;
447 def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>;
448 def : InstRW<[WriteSequence<[M1WriteNAL11], 2>],
449 (instregex "^TB[LX]v8i8Two")>;
450 def : InstRW<[WriteSequence<[M1WriteNAL11], 3>],
451 (instregex "^TB[LX]v8i8Three")>;
452 def : InstRW<[WriteSequence<[M1WriteNAL11], 4>],
453 (instregex "^TB[LX]v8i8Four")>;
454 def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>;
455 def : InstRW<[WriteSequence<[M1WriteNAL12], 2>],
456 (instregex "^TB[LX]v16i8Two")>;
457 def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
458 (instregex "^TB[LX]v16i8Three")>;
459 def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
460 (instregex "^TB[LX]v16i8Four")>;
461 def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>;
462 def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>;
463 def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>;
464 def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>;
465 def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
467 // ASIMD load instructions.
468 def : InstRW<[M1WriteVLDD], (instregex "LD1i(8|16|32)$")>;
469 def : InstRW<[M1WriteVLDD,
470 WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
471 def : InstRW<[M1WriteVLDE], (instregex "LD1i(64)$")>;
472 def : InstRW<[M1WriteVLDE,
473 WriteAdr], (instregex "LD1i(64)_POST$")>;
475 def : InstRW<[M1WriteL5], (instregex "LD1Rv(8b|4h|2s)$")>;
476 def : InstRW<[M1WriteL5,
477 WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
478 def : InstRW<[M1WriteL5], (instregex "LD1Rv(1d)$")>;
479 def : InstRW<[M1WriteL5,
480 WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
481 def : InstRW<[M1WriteL5], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
482 def : InstRW<[M1WriteL5,
483 WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
485 def : InstRW<[M1WriteL5], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
486 def : InstRW<[M1WriteL5,
487 WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
488 def : InstRW<[M1WriteL5], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
489 def : InstRW<[M1WriteL5,
490 WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
491 def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
492 def : InstRW<[M1WriteVLDA,
493 WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
494 def : InstRW<[M1WriteVLDA], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
495 def : InstRW<[M1WriteVLDA,
496 WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
497 def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
498 def : InstRW<[M1WriteVLDB,
499 WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
500 def : InstRW<[M1WriteVLDB], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
501 def : InstRW<[M1WriteVLDB,
502 WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
503 def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
504 def : InstRW<[M1WriteVLDC,
505 WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
506 def : InstRW<[M1WriteVLDC], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
507 def : InstRW<[M1WriteVLDC,
508 WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
510 def : InstRW<[M1WriteVLDG], (instregex "LD2i(8|16)$")>;
511 def : InstRW<[M1WriteVLDG,
512 WriteAdr], (instregex "LD2i(8|16)_POST$")>;
513 def : InstRW<[M1WriteVLDG], (instregex "LD2i(32)$")>;
514 def : InstRW<[M1WriteVLDG,
515 WriteAdr], (instregex "LD2i(32)_POST$")>;
516 def : InstRW<[M1WriteVLDH], (instregex "LD2i(64)$")>;
517 def : InstRW<[M1WriteVLDH,
518 WriteAdr], (instregex "LD2i(64)_POST$")>;
520 def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(8b|4h|2s)$")>;
521 def : InstRW<[M1WriteVLDA,
522 WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
523 def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(1d)$")>;
524 def : InstRW<[M1WriteVLDA,
525 WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
526 def : InstRW<[M1WriteVLDA], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
527 def : InstRW<[M1WriteVLDA,
528 WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
530 def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(8b|4h|2s)$")>;
531 def : InstRW<[M1WriteVLDF,
532 WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
533 def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(16b|8h|4s)$")>;
534 def : InstRW<[M1WriteVLDF,
535 WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
536 def : InstRW<[M1WriteVLDF], (instregex "LD2Twov(2d)$")>;
537 def : InstRW<[M1WriteVLDF,
538 WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
540 def : InstRW<[M1WriteVLDJ], (instregex "LD3i(8|16)$")>;
541 def : InstRW<[M1WriteVLDJ,
542 WriteAdr], (instregex "LD3i(8|16)_POST$")>;
543 def : InstRW<[M1WriteVLDJ], (instregex "LD3i(32)$")>;
544 def : InstRW<[M1WriteVLDJ,
545 WriteAdr], (instregex "LD3i(32)_POST$")>;
546 def : InstRW<[M1WriteVLDL], (instregex "LD3i(64)$")>;
547 def : InstRW<[M1WriteVLDL,
548 WriteAdr], (instregex "LD3i(64)_POST$")>;
550 def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(8b|4h|2s)$")>;
551 def : InstRW<[M1WriteVLDB,
552 WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
553 def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(1d)$")>;
554 def : InstRW<[M1WriteVLDB,
555 WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
556 def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(16b|8h|4s)$")>;
557 def : InstRW<[M1WriteVLDB,
558 WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
559 def : InstRW<[M1WriteVLDB], (instregex "LD3Rv(2d)$")>;
560 def : InstRW<[M1WriteVLDB,
561 WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
563 def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(8b|4h|2s)$")>;
564 def : InstRW<[M1WriteVLDI,
565 WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
566 def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(16b|8h|4s)$")>;
567 def : InstRW<[M1WriteVLDI,
568 WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
569 def : InstRW<[M1WriteVLDI], (instregex "LD3Threev(2d)$")>;
570 def : InstRW<[M1WriteVLDI,
571 WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
573 def : InstRW<[M1WriteVLDK], (instregex "LD4i(8|16)$")>;
574 def : InstRW<[M1WriteVLDK,
575 WriteAdr], (instregex "LD4i(8|16)_POST$")>;
576 def : InstRW<[M1WriteVLDK], (instregex "LD4i(32)$")>;
577 def : InstRW<[M1WriteVLDK,
578 WriteAdr], (instregex "LD4i(32)_POST$")>;
579 def : InstRW<[M1WriteVLDM], (instregex "LD4i(64)$")>;
580 def : InstRW<[M1WriteVLDM,
581 WriteAdr], (instregex "LD4i(64)_POST$")>;
583 def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(8b|4h|2s)$")>;
584 def : InstRW<[M1WriteVLDC,
585 WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
586 def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(1d)$")>;
587 def : InstRW<[M1WriteVLDC,
588 WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
589 def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(16b|8h|4s)$")>;
590 def : InstRW<[M1WriteVLDC,
591 WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
592 def : InstRW<[M1WriteVLDC], (instregex "LD4Rv(2d)$")>;
593 def : InstRW<[M1WriteVLDC,
594 WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
596 def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(8b|4h|2s)$")>;
597 def : InstRW<[M1WriteVLDN,
598 WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
599 def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(16b|8h|4s)$")>;
600 def : InstRW<[M1WriteVLDN,
601 WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
602 def : InstRW<[M1WriteVLDN], (instregex "LD4Fourv(2d)$")>;
603 def : InstRW<[M1WriteVLDN,
604 WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
606 // ASIMD store instructions.
607 def : InstRW<[M1WriteVSTD], (instregex "ST1i(8|16|32)$")>;
608 def : InstRW<[M1WriteVSTD,
609 WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
610 def : InstRW<[M1WriteVSTD], (instregex "ST1i(64)$")>;
611 def : InstRW<[M1WriteVSTD,
612 WriteAdr], (instregex "ST1i(64)_POST$")>;
614 def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
615 def : InstRW<[WriteVST,
616 WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
617 def : InstRW<[WriteVST], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
618 def : InstRW<[WriteVST,
619 WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
620 def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
621 def : InstRW<[M1WriteVSTA,
622 WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
623 def : InstRW<[M1WriteVSTA], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
624 def : InstRW<[M1WriteVSTA,
625 WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
626 def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
627 def : InstRW<[M1WriteVSTB,
628 WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
629 def : InstRW<[M1WriteVSTB], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
630 def : InstRW<[M1WriteVSTB,
631 WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
632 def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
633 def : InstRW<[M1WriteVSTC,
634 WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
635 def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
636 def : InstRW<[M1WriteVSTC,
637 WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
639 def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>;
640 def : InstRW<[M1WriteVSTD,
641 WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
642 def : InstRW<[M1WriteVSTD], (instregex "ST2i(64)$")>;
643 def : InstRW<[M1WriteVSTD,
644 WriteAdr], (instregex "ST2i(64)_POST$")>;
646 def : InstRW<[M1WriteVSTD], (instregex "ST2Twov(8b|4h|2s)$")>;
647 def : InstRW<[M1WriteVSTD,
648 WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
649 def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(16b|8h|4s)$")>;
650 def : InstRW<[M1WriteVSTE,
651 WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
652 def : InstRW<[M1WriteVSTE], (instregex "ST2Twov(2d)$")>;
653 def : InstRW<[M1WriteVSTE,
654 WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
656 def : InstRW<[M1WriteVSTH], (instregex "ST3i(8|16)$")>;
657 def : InstRW<[M1WriteVSTH,
658 WriteAdr], (instregex "ST3i(8|16)_POST$")>;
659 def : InstRW<[M1WriteVSTH], (instregex "ST3i(32)$")>;
660 def : InstRW<[M1WriteVSTH,
661 WriteAdr], (instregex "ST3i(32)_POST$")>;
662 def : InstRW<[M1WriteVSTF], (instregex "ST3i(64)$")>;
663 def : InstRW<[M1WriteVSTF,
664 WriteAdr], (instregex "ST3i(64)_POST$")>;
666 def : InstRW<[M1WriteVSTF], (instregex "ST3Threev(8b|4h|2s)$")>;
667 def : InstRW<[M1WriteVSTF,
668 WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
669 def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(16b|8h|4s)$")>;
670 def : InstRW<[M1WriteVSTG,
671 WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
672 def : InstRW<[M1WriteVSTG], (instregex "ST3Threev(2d)$")>;
673 def : InstRW<[M1WriteVSTG,
674 WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
676 def : InstRW<[M1WriteVSTH], (instregex "ST4i(8|16)$")>;
677 def : InstRW<[M1WriteVSTH,
678 WriteAdr], (instregex "ST4i(8|16)_POST$")>;
679 def : InstRW<[M1WriteVSTH], (instregex "ST4i(32)$")>;
680 def : InstRW<[M1WriteVSTH,
681 WriteAdr], (instregex "ST4i(32)_POST$")>;
682 def : InstRW<[M1WriteVSTF], (instregex "ST4i(64)$")>;
683 def : InstRW<[M1WriteVSTF,
684 WriteAdr], (instregex "ST4i(64)_POST$")>;
686 def : InstRW<[M1WriteVSTF], (instregex "ST4Fourv(8b|4h|2s)$")>;
687 def : InstRW<[M1WriteVSTF,
688 WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
689 def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(16b|8h|4s)$")>;
690 def : InstRW<[M1WriteVSTI,
691 WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
692 def : InstRW<[M1WriteVSTI], (instregex "ST4Fourv(2d)$")>;
693 def : InstRW<[M1WriteVSTI,
694 WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
696 // Cryptography instructions.
697 def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
698 def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
699 def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
700 def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
702 def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
703 def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
704 def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>;
705 def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>;
706 def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
709 def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
711 } // SchedModel = ExynosM1Model