]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td
Merge llvm, clang, lld, lldb, compiler-rt and libc++ r304149, and update
[FreeBSD/FreeBSD.git] / contrib / llvm / lib / Target / AArch64 / AArch64SchedFalkorDetails.td
1 //==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the uop and latency details for the machine model for the
11 // Qualcomm Falkor subtarget.
12 //
13 //===----------------------------------------------------------------------===//
14
15 // Contains all of the Falkor specific SchedWriteRes types. The approach
16 // below is to define a generic SchedWriteRes for every combination of
17 // latency and microOps. The naming conventions is to use a prefix, one field
18 // for latency, and one or more microOp count/type designators.
19 //   Prefix: FalkorWr
20 //   MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
21 //   Latency: #cyc
22 //
23 // e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
24 //      down one Z pipe, six SD pipes, four VX pipes and the total latency is
25 //      six cycles.
26 //
27 // Contains all of the Falkor specific ReadAdvance types for forwarding logic.
28 //
29 // Contains all of the Falkor specific WriteVariant types for immediate zero
30 // and LSLFast.
31 //===----------------------------------------------------------------------===//
32
33 //===----------------------------------------------------------------------===//
34 // Define 0 micro-op types
35 def FalkorWr_none_1cyc : SchedWriteRes<[]> {
36   let Latency = 1;
37   let NumMicroOps = 0;
38 }
39 def FalkorWr_none_3cyc : SchedWriteRes<[]> {
40   let Latency = 3;
41   let NumMicroOps = 0;
42 }
43 def FalkorWr_none_4cyc : SchedWriteRes<[]> {
44   let Latency = 4;
45   let NumMicroOps = 0;
46 }
47
48 //===----------------------------------------------------------------------===//
49 // Define 1 micro-op types
50
51 def FalkorWr_1X_2cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 2; }
52 def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
53 def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
54 def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
55 def FalkorWr_1Z_0cyc    : SchedWriteRes<[FalkorUnitZ]>   { let Latency = 0; }
56 def FalkorWr_1ZB_0cyc   : SchedWriteRes<[FalkorUnitZB]>  { let Latency = 0; }
57 def FalkorWr_1LD_3cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 3; }
58 def FalkorWr_1LD_4cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 4; }
59 def FalkorWr_1XYZ_1cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
60 def FalkorWr_1XYZ_2cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
61 def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
62 def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
63 def FalkorWr_1none_0cyc : SchedWriteRes<[]>              { let Latency = 0; }
64
65 def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
66 def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
67 def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
68 def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
69 def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
70 def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
71 def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
72 def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
73 def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
74
75 def FalkorWr_1LD_0cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 0; }
76 def FalkorWr_1ST_0cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 0; }
77 def FalkorWr_1ST_3cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 3; }
78
79 def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
80 def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
81 def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
82
83 //===----------------------------------------------------------------------===//
84 // Define 2 micro-op types
85
86 def FalkorWr_2VXVY_1cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
87   let Latency = 1;
88   let NumMicroOps = 2;
89 }
90 def FalkorWr_2VXVY_2cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
91   let Latency = 2;
92   let NumMicroOps = 2;
93 }
94 def FalkorWr_2VXVY_3cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
95   let Latency = 3;
96   let NumMicroOps = 2;
97 }
98 def FalkorWr_2VXVY_4cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
99   let Latency = 4;
100   let NumMicroOps = 2;
101 }
102 def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
103   let Latency = 4;
104   let NumMicroOps = 2;
105 }
106 def FalkorWr_2VXVY_5cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
107   let Latency = 5;
108   let NumMicroOps = 2;
109 }
110 def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
111   let Latency = 5;
112   let NumMicroOps = 2;
113 }
114 def FalkorWr_2VXVY_6cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
115   let Latency = 6;
116   let NumMicroOps = 2;
117 }
118 def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
119   let Latency = 6;
120   let NumMicroOps = 2;
121 }
122
123 def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
124   let Latency = 4;
125   let NumMicroOps = 2;
126 }
127 def FalkorWr_1XYZ_1LD_4cyc  : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
128   let Latency = 4;
129   let NumMicroOps = 2;
130 }
131 def FalkorWr_2LD_3cyc   : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
132   let Latency = 3;
133   let NumMicroOps = 2;
134 }
135
136 def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
137   let Latency = 5;
138   let NumMicroOps = 2;
139 }
140
141 def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
142   let Latency = 2;
143   let NumMicroOps = 2;
144 }
145
146 def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
147   let Latency = 4;
148   let NumMicroOps = 2;
149 }
150
151 def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
152   let Latency = 10;
153   let NumMicroOps = 2;
154 }
155
156 def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
157   let Latency = 2;
158   let NumMicroOps = 2;
159 }
160
161 def FalkorWr_2GTOV_1cyc    : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
162   let Latency = 1;
163   let NumMicroOps = 2;
164 }
165
166 def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
167   let Latency = 4;
168   let NumMicroOps = 2;
169 }
170 def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
171   let Latency = 5;
172   let NumMicroOps = 2;
173 }
174
175 def FalkorWr_2XYZ_2cyc   : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
176   let Latency = 2;
177   let NumMicroOps = 2;
178 }
179
180 def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
181   let Latency = 0;
182   let NumMicroOps = 2;
183 }
184
185 def FalkorWr_1X_1Z_8cyc  : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
186   let Latency = 8;
187   let NumMicroOps = 2;
188   let ResourceCycles = [2, 8];
189 }
190
191 def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
192   let Latency = 16;
193   let NumMicroOps = 2;
194   let ResourceCycles = [2, 16];
195 }
196
197 def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
198   let Latency = 3;
199   let NumMicroOps = 2;
200 }
201
202 def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
203   let Latency = 3;
204   let NumMicroOps = 2;
205 }
206
207 def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
208   let Latency = 0;
209   let NumMicroOps = 2;
210 }
211
212 def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> {
213   let Latency = 0;
214   let NumMicroOps = 2;
215 }
216
217 //===----------------------------------------------------------------------===//
218 // Define 3 micro-op types
219
220 def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
221                                                FalkorUnitLD]> {
222   let Latency = 0;
223   let NumMicroOps = 3;
224 }
225
226 def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
227                                                FalkorUnitLD]> {
228   let Latency = 3;
229   let NumMicroOps = 3;
230 }
231
232 def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
233   let Latency = 3;
234   let NumMicroOps = 3;
235 }
236
237 def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
238   let Latency = 4;
239   let NumMicroOps = 3;
240 }
241
242 def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
243   let Latency = 5;
244   let NumMicroOps = 3;
245 }
246
247 def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
248   let Latency = 6;
249   let NumMicroOps = 3;
250 }
251
252 def FalkorWr_1LD_2VXVY_4cyc  : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
253   let Latency = 4;
254   let NumMicroOps = 3;
255 }
256
257 def FalkorWr_2LD_1none_3cyc  : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
258   let Latency = 3;
259   let NumMicroOps = 3;
260 }
261
262 def FalkorWr_3LD_3cyc        : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
263                                               FalkorUnitLD]> {
264   let Latency = 3;
265   let NumMicroOps = 3;
266 }
267
268 def FalkorWr_2LD_1Z_3cyc     : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
269                                              FalkorUnitZ]> {
270   let Latency = 3;
271   let NumMicroOps = 3;
272 }
273
274 def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> {
275   let Latency = 0;
276   let NumMicroOps = 3;
277 }
278 def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> {
279   let Latency = 0;
280   let NumMicroOps = 3;
281 }
282 //===----------------------------------------------------------------------===//
283 // Define 4 micro-op types
284
285 def FalkorWr_2VX_2VY_2cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
286                                             FalkorUnitVX, FalkorUnitVY]> {
287   let Latency = 2;
288   let NumMicroOps = 4;
289 }
290
291 def FalkorWr_4VXVY_2cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
292                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
293   let Latency = 2;
294   let NumMicroOps = 4;
295 }
296 def FalkorWr_4VXVY_3cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
297                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
298   let Latency = 3;
299   let NumMicroOps = 4;
300 }
301 def FalkorWr_4VXVY_4cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
302                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
303   let Latency = 4;
304   let NumMicroOps = 4;
305 }
306 def FalkorWr_4VXVY_6cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
307                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
308   let Latency = 6;
309   let NumMicroOps = 4;
310 }
311
312 def FalkorWr_4LD_3cyc      : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
313                                             FalkorUnitLD, FalkorUnitLD]> {
314   let Latency = 3;
315   let NumMicroOps = 4;
316 }
317
318 def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
319                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
320   let Latency = 4;
321   let NumMicroOps = 4;
322 }
323
324 def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
325   let Latency = 3;
326   let NumMicroOps = 4;
327 }
328
329 def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
330                                               FalkorUnitSD, FalkorUnitLD]> {
331   let Latency = 3;
332   let NumMicroOps = 4;
333 }
334
335 def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
336                                            FalkorUnitST, FalkorUnitVSD]> {
337   let Latency = 0;
338   let NumMicroOps = 4;
339 }
340
341 //===----------------------------------------------------------------------===//
342 // Define 5 micro-op types
343
344 def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
345                                             FalkorUnitVXVY, FalkorUnitVXVY,
346                                             FalkorUnitVXVY]> {
347   let Latency = 4;
348   let NumMicroOps = 5;
349 }
350 def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
351                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
352   let Latency = 4;
353   let NumMicroOps = 5;
354 }
355 def FalkorWr_5VXVY_7cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
356                                             FalkorUnitVXVY, FalkorUnitVXVY,
357                                             FalkorUnitVXVY]> {
358   let Latency = 7;
359   let NumMicroOps = 5;
360 }
361 def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
362                                                 FalkorUnitVSD, FalkorUnitST,
363                                                 FalkorUnitVSD]> {
364   let Latency = 0;
365   let NumMicroOps = 5;
366 }
367 def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
368                                                   FalkorUnitVSD, FalkorUnitST,
369                                                   FalkorUnitVSD]> {
370   let Latency = 0;
371   let NumMicroOps = 5;
372 }
373 //===----------------------------------------------------------------------===//
374 // Define 6 micro-op types
375
376 def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
377                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
378   let Latency = 4;
379   let NumMicroOps = 6;
380 }
381
382 def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
383                                                 FalkorUnitVSD, FalkorUnitXYZ,
384                                                 FalkorUnitST, FalkorUnitVSD]> {
385   let Latency = 0;
386   let NumMicroOps = 6;
387 }
388
389 def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
390                                                  FalkorUnitVSD, FalkorUnitVXVY,
391                                                  FalkorUnitST, FalkorUnitVSD]> {
392   let Latency = 0;
393   let NumMicroOps = 6;
394 }
395
396 def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
397                                            FalkorUnitST, FalkorUnitVSD,
398                                            FalkorUnitST, FalkorUnitVSD]> {
399   let Latency = 0;
400   let NumMicroOps = 6;
401 }
402
403 //===----------------------------------------------------------------------===//
404 // Define 8 micro-op types
405
406 def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
407                                              FalkorUnitVXVY, FalkorUnitVXVY,
408                                              FalkorUnitLD, FalkorUnitLD,
409                                              FalkorUnitVXVY, FalkorUnitVXVY]> {
410   let Latency = 4;
411   let NumMicroOps = 8;
412 }
413
414 def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
415                                            FalkorUnitST, FalkorUnitVSD,
416                                            FalkorUnitST, FalkorUnitVSD,
417                                            FalkorUnitST, FalkorUnitVSD]> {
418   let Latency = 0;
419   let NumMicroOps = 8;
420 }
421
422 //===----------------------------------------------------------------------===//
423 // Define 9 micro-op types
424
425 def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
426                                              FalkorUnitLD, FalkorUnitVXVY,
427                                              FalkorUnitVXVY, FalkorUnitLD,
428                                              FalkorUnitLD, FalkorUnitXYZ,
429                                              FalkorUnitVXVY, FalkorUnitVXVY]> {
430   let Latency = 4;
431   let NumMicroOps = 9;
432 }
433
434 def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
435                                              FalkorUnitLD, FalkorUnitVXVY,
436                                              FalkorUnitVXVY, FalkorUnitXYZ,
437                                              FalkorUnitLD, FalkorUnitLD,
438                                              FalkorUnitVXVY, FalkorUnitVXVY]> {
439   let Latency = 4;
440   let NumMicroOps = 9;
441 }
442
443 //===----------------------------------------------------------------------===//
444 // Define 10 micro-op types
445
446 def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
447                                                  FalkorUnitVSD, FalkorUnitVXVY,
448                                                  FalkorUnitST, FalkorUnitVSD,
449                                                  FalkorUnitST, FalkorUnitVSD,
450                                                  FalkorUnitST, FalkorUnitVSD]> {
451   let Latency = 0;
452   let NumMicroOps = 10;
453 }
454
455 //===----------------------------------------------------------------------===//
456 // Define 12 micro-op types
457
458 def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
459                                                  FalkorUnitVSD, FalkorUnitVXVY,
460                                                  FalkorUnitST, FalkorUnitVSD,
461                                                  FalkorUnitVXVY, FalkorUnitST,
462                                                  FalkorUnitVSD, FalkorUnitVXVY,
463                                                  FalkorUnitST, FalkorUnitVSD]> {
464   let Latency = 0;
465   let NumMicroOps = 12;
466 }
467
468 // Forwarding logic is modeled for multiply add/accumulate.
469 // -----------------------------------------------------------------------------
470 def FalkorReadIMA32  : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
471 def FalkorReadIMA64  : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
472 def FalkorReadVMA    : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>;
473 def FalkorReadFMA32  : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
474 def FalkorReadFMA64  : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
475
476 // SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
477 // -----------------------------------------------------------------------------
478 def FalkorImmZPred    : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>;
479 def FalkorFMOVZrReg   : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
480                                          MI->getOperand(1).getReg() == AArch64::XZR}]>;
481 def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
482
483 def FalkorWr_FMOV  : SchedWriteVariant<[
484                        SchedVar<FalkorFMOVZrReg, [FalkorWr_1none_0cyc]>,
485                        SchedVar<NoSchedPred,     [FalkorWr_1GTOV_1cyc]>]>;
486
487 def FalkorWr_MOVZ  : SchedWriteVariant<[
488                        SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
489                        SchedVar<NoSchedPred,    [FalkorWr_1XYZB_1cyc]>]>;
490
491 def FalkorWr_ADDSUBsx : SchedWriteVariant<[
492                           SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
493                           SchedVar<NoSchedPred,            [FalkorWr_2XYZ_2cyc]>]>;
494
495 def FalkorWr_LDRro : SchedWriteVariant<[
496                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>,
497                        SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1LD_4cyc]>]>;
498
499 def FalkorWr_LDRSro : SchedWriteVariant<[
500                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
501                         SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1LD_5cyc]>]>;
502
503 def FalkorWr_PRFMro : SchedWriteVariant<[
504                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
505                         SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1ST_4cyc]>]>;
506
507 def FalkorWr_STRVro : SchedWriteVariant<[
508                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>,
509                         SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>;
510
511 def FalkorWr_STRQro : SchedWriteVariant<[
512                         SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>,
513                         SchedVar<NoSchedPred,            [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>;
514
515 def FalkorWr_STRro : SchedWriteVariant<[
516                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>,
517                        SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>;
518
519 //===----------------------------------------------------------------------===//
520 // Specialize the coarse model by associating instruction groups with the
521 // subtarget-defined types. As the modeled is refined, this will override most
522 // of the earlier mappings.
523
524 // Miscellaneous
525 // -----------------------------------------------------------------------------
526
527 // FIXME: This could be better modeled by looking at the regclasses of the operands.
528 def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>;
529
530 // SIMD Floating-point Instructions
531 // -----------------------------------------------------------------------------
532 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(FABS|FNEG)v2f32$")>;
533
534 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>;
535 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FAC(GE|GT)(32|64)$")>;
536 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>;
537 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>;
538 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>;
539
540 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>;
541 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(FABD|FADD|FSUB)v2f32$")>;
542 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>;
543
544 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>;
545 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs FCVTXNv1i64)>;
546 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>;
547
548 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
549                                       (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
550 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
551                                       (instrs FMULX32)>;
552
553 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
554                                       (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
555 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
556                                       (instrs FMULX64)>;
557
558 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>;
559
560 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>;
561 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>;
562 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instrs FCVTLv4i16, FCVTLv2i32)>;
563 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
564
565 def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)v2f32$")>;
566
567 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
568
569 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>;
570 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs FCVTLv8i16, FCVTLv4i32)>;
571 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>;
572
573 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
574                                       (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>;
575
576 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
577                                       (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
578
579 def : InstRW<[FalkorWr_3VXVY_4cyc],   (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
580 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
581
582 def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32)$")>;
583
584 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
585                                       (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
586 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
587                                       (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
588
589 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32],
590                                       (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
591 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64],
592                                       (instregex "^FML(A|S)v1i64_indexed$")>;
593 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32],
594                                       (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>;
595 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64],
596                                       (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
597
598 // SIMD Integer Instructions
599 // -----------------------------------------------------------------------------
600 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
601 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs ADDPv2i64p)>;
602 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
603 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
604 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
605 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
606
607 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
608 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHLv1i64$")>;
609 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
610 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHRd$")>;
611 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
612 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
613 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
614 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
615 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs PMULv8i8)>;
616 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
617 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^SHLd$")>;
618
619 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
620 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
621 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
622 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i16v$")>;
623 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
624 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
625 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
626 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
627 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
628 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
629 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
630 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RSHRd$")>;
631 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>;
632 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
633 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
634 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs ADDVv4i16v)>;
635 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
636 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
637 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
638
639 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)ADDLVv8i8v$")>;
640 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
641 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs ADDVv8i8v)>;
642 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
643                                       (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
644 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
645                                       (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
646 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
647                                       (instregex "^SQDMULL(i16|i32)$")>;
648 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
649                                       (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
650
651 def : InstRW<[FalkorWr_1VXVY_5cyc],   (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
652
653 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs ADDVv4i32v)>;
654
655 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs ADDVv8i16v)>;
656 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(ADD|SUB)HNv.*$")>;
657 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
658
659 def : InstRW<[FalkorWr_2VXVY_5cyc],   (instrs ADDVv16i8v)>;
660
661 def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
662 def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^R(ADD|SUB)HNv.*$")>;
663
664 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
665 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs ADDPv2i64)>; // sz==11
666 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
667 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
668 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
669
670 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)ADDLv.*$")>;
671 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
672 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
673 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
674 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SUBLv.*$")>;
675 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
676 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
677 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
678 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
679 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
680 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^PMULL(v8i8|v16i8)$")>;
681 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
682 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
683
684 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
685 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
686 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABDLv.*$")>;
687 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
688 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
689 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
690 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
691 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>;
692 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
693 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^PMULL(v1i64|v2i64)$")>;
694 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
695 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
696
697 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
698                                       (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
699 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
700                                       (instregex "^SQDMULLv.*$")>;
701 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
702                                       (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
703
704 def : InstRW<[FalkorWr_3VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i32v$")>;
705
706 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^(S|U)ADDLVv8i16v$")>;
707
708 def : InstRW<[FalkorWr_3VXVY_6cyc],   (instregex "^(S|U)ADDLVv16i8v$")>;
709
710 def : InstRW<[FalkorWr_4VXVY_2cyc],   (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
711
712 def : InstRW<[FalkorWr_4VXVY_3cyc],   (instregex "^(S|U)ABALv.*$")>;
713
714 def : InstRW<[FalkorWr_4VXVY_4cyc],   (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
715
716 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
717                                       (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
718 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
719                                       (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
720
721 // SIMD Load Instructions
722 // -----------------------------------------------------------------------------
723 def : InstRW<[FalkorWr_1LD_3cyc],                           (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
724 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],       (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
725 def : InstRW<[FalkorWr_1LD_3cyc],                           (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
726 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],       (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
727 def : InstRW<[FalkorWr_1LD_3cyc],                           (instrs LD2i64)>;
728 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],       (instrs LD2i64_POST)>;
729
730 def : InstRW<[FalkorWr_1LD_1VXVY_4cyc],                     (instregex "^LD1i(8|16|32)$")>;
731 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)_POST$")>;
732
733 def : InstRW<[FalkorWr_1LD_1none_3cyc],                     (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
734 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
735 def : InstRW<[FalkorWr_1LD_1none_3cyc],                     (instregex "^LD2Twov(8b|4h|2s|1d)$")>;
736 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>;
737 def : InstRW<[FalkorWr_1LD_1none_3cyc],                     (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
738 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
739
740 def : InstRW<[FalkorWr_2LD_3cyc],                           (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
741 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc],       (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
742 def : InstRW<[FalkorWr_2LD_3cyc],                           (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
743 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc],       (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
744 def : InstRW<[FalkorWr_2LD_3cyc],                           (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
745 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc],       (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
746 def : InstRW<[FalkorWr_2LD_3cyc],                           (instrs LD3i64)>;
747 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc],       (instrs LD3i64_POST)>;
748 def : InstRW<[FalkorWr_2LD_3cyc],                           (instrs LD4i64)>;
749 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc],       (instrs LD4i64_POST)>;
750
751 def : InstRW<[FalkorWr_1LD_2VXVY_4cyc],                     (instregex "^LD2i(8|16|32)$")>;
752 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)_POST$")>;
753
754 def : InstRW<[FalkorWr_2LD_1none_3cyc],                     (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
755 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
756 def : InstRW<[FalkorWr_2LD_1none_3cyc],                     (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
757 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
758
759 def : InstRW<[FalkorWr_3LD_3cyc],                           (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
760 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc],       (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
761 def : InstRW<[FalkorWr_3LD_3cyc],                           (instrs LD3Threev2d)>;
762 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc],       (instrs LD3Threev2d_POST)>;
763 def : InstRW<[FalkorWr_3LD_3cyc],                           (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
764 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc],       (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
765
766 def : InstRW<[FalkorWr_1LD_3VXVY_4cyc],                     (instregex "^LD3i(8|16|32)$")>;
767 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)_POST$")>;
768
769 def : InstRW<[FalkorWr_2LD_2none_3cyc],                     (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
770 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
771 def : InstRW<[FalkorWr_2LD_2none_3cyc],                     (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
772 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
773
774 def : InstRW<[FalkorWr_4LD_3cyc],                           (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
775 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc],       (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
776 def : InstRW<[FalkorWr_4LD_3cyc],                           (instrs LD4Fourv2d)>;
777 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc],       (instrs LD4Fourv2d_POST)>;
778 def : InstRW<[FalkorWr_4LD_3cyc],                           (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
779 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc],       (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
780
781 def : InstRW<[FalkorWr_1LD_4VXVY_4cyc],                     (instregex "^LD4i(8|16|32)$")>;
782 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)_POST$")>;
783
784 def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc],               (instregex "^LD3Threev(8b|4h|2s|1d)$")>;
785 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1none_4cyc],
786                                                             (instregex "^LD3Threev(8b|4h|2s|1d)_POST$")>;
787
788 def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc],               (instregex "^LD4Fourv(8b|4h|2s|1d)$")>;
789 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2none_4cyc],
790                                                             (instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>;
791
792 def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc],           (instregex "^LD3Threev(16b|8h|4s)$")>;
793
794 def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc],           (instregex "^LD4Fourv(16b|8h|4s)$")>;
795
796 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc],
797                                                             (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
798
799 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc],
800                                                             (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
801
802 // Arithmetic and Logical Instructions
803 // -----------------------------------------------------------------------------
804 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
805 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ADC(S)?(W|X)r$")>;
806 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ADD(S)?(W|X)r(r|i)$")>;
807 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
808 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
809 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^BIC(S)?(W|X)r(r|s)$")>;
810 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EON(W|X)r(r|s)$")>;
811 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EOR(W|X)r(i|r|s)$")>;
812 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORN(W|X)r(r|s)$")>;
813 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORR(W|X)r(i|r|s)$")>;
814 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^SBC(S)?(W|X)r$")>;
815 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^SUB(S)?(W|X)r(r|i)$")>;
816 def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
817 def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
818
819 // SIMD Miscellaneous Instructions
820 // -----------------------------------------------------------------------------
821 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
822 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
823 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^CPY(i8|i16|i32|i64)$")>;
824 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^INSv(i8|i16)(gpr|lane)$")>;
825 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^(S|U)MOVv.*$")>;
826 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIF|BIT|BSL)v8i8$")>;
827 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs EXTv8i8)>;
828 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>;
829 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs TBLv8i8One)>;
830 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs NOTv8i8)>;
831 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^REV(16|32|64)v.*$")>;
832 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
833
834 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>;
835
836 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "(S|U)QXTU?Nv.*$")>;
837 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
838 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FRECPXv1i32, FRECPXv1i64)>;
839 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs URECPEv2i32, URSQRTEv2i32)>;
840
841 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
842                                       (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
843
844 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
845                                       (instrs FRECPS64, FRSQRTS64)>;
846
847 def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
848                                       (instregex "^INSv(i32|i64)(gpr|lane)$")>;
849 def : InstRW<[FalkorWr_2GTOV_1cyc],   (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
850 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIF|BIT|BSL)v16i8$")>;
851 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs EXTv16i8)>;
852 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>;
853 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs NOTv16i8)>;
854 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs TBLv16i8One)>;
855
856 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
857 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
858 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs URECPEv4i32, URSQRTEv4i32)>;
859
860 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs TBLv8i8Two)>;
861 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^TBX(v8|v16)i8One$")>;
862
863 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
864                                       (instrs FRECPSv4f32, FRSQRTSv4f32)>;
865
866 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
867                                       (instrs FRECPSv2f64, FRSQRTSv2f64)>;
868
869 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBL(v8i8Three|v16i8Two)$")>;
870 def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBX(v8i8Two|v16i8Two)$")>;
871
872 def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBL(v8i8Four|v16i8Three)$")>;
873 def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBX(v8i8Three|v16i8Three)$")>;
874
875 def : InstRW<[FalkorWr_5VXVY_7cyc],   (instrs TBLv16i8Four)>;
876 def : InstRW<[FalkorWr_5VXVY_7cyc],   (instregex "^TBX(v8i8Four|v16i8Four)$")>;
877
878 // SIMD Store Instructions
879 // -----------------------------------------------------------------------------
880
881 def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STR(Q|D|S|H|B)ui$")>;
882 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
883                                        (instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
884 def : InstRW<[FalkorWr_STRVro],        (instregex "^STR(D|S|H|B)ro(W|X)$")>;
885 def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^STPQi$")>;
886 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2VSD_2ST_0cyc],
887                                        (instregex "^STPQ(post|pre)$")>;
888 def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STP(D|S)(i)$")>;
889 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
890                                        (instregex "^STP(D|S)(post|pre)$")>;
891 def : InstRW<[FalkorWr_STRQro],        (instregex "^STRQro(W|X)$")>;
892 def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STUR(Q|D|S|B|H)i$")>;
893 def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instrs STNPDi, STNPSi)>;
894 def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instrs STNPQi)>;
895
896 def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
897 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc],
898                                        (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
899 def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
900 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc],
901                                        (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
902 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc],
903                                        (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>;
904
905 def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
906 def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
907 def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST3(i8|i16|i32|i64)$")>;
908 def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST4(i8|i16|i32|i64)$")>;
909 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
910 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
911                                        (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
912 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
913 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
914                                        (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
915 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
916 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
917                                        (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
918 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
919 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc],
920                                        (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
921
922 def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc],
923                                        (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>;
924 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
925 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc],
926                                        (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>;
927
928 def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
929 def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instrs ST3Threev2d)>;
930 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
931 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc],
932                                        (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
933 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
934 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc],
935                                        (instrs ST3Threev2d_POST)>;
936
937 def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc],
938                                        (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>;
939 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
940 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc],
941                                        (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>;
942
943 def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
944 def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instrs ST4Fourv2d)>;
945 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
946 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc],
947                                        (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
948 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
949 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc],
950                                        (instrs ST4Fourv2d_POST)>;
951
952 def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc],
953                                        (instregex "^ST3Three(v16b|v8h|v4s)$")>;
954 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
955 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc],
956                                        (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
957
958 def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc],
959                                        (instregex "^ST4Four(v16b|v8h|v4s)$")>;
960 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
961 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc],
962                                        (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
963
964 // Branch Instructions
965 // -----------------------------------------------------------------------------
966 def : InstRW<[FalkorWr_1none_0cyc],   (instrs B, TCRETURNdi)>;
967 def : InstRW<[FalkorWr_1Z_0cyc],      (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
968 def : InstRW<[FalkorWr_1Z_0cyc],      (instrs RET_ReallyLR, TCRETURNri)>;
969 def : InstRW<[FalkorWr_1ZB_0cyc],     (instrs Bcc)>;
970 def : InstRW<[FalkorWr_1XYZB_0cyc],   (instrs BL)>;
971 def : InstRW<[FalkorWr_1Z_1XY_0cyc],  (instrs BLR)>;
972
973 // Cryptography Extensions
974 // -----------------------------------------------------------------------------
975 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs SHA1Hrr)>;
976 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs AESIMCrr, AESMCrr)>;
977 def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs AESDrr, AESErr)>;
978 def : InstRW<[FalkorWr_2VXVY_2cyc],   (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
979 def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
980 def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
981 def : InstRW<[FalkorWr_4VXVY_3cyc],   (instrs SHA256SU1rrr)>;
982
983 // FP Load Instructions
984 // -----------------------------------------------------------------------------
985 def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
986 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],
987                                       (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
988 def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDUR(Q|D|S|H|B)i$")>;
989 def : InstRW<[FalkorWr_LDRro],        (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
990 def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
991                                       (instrs LDNPQi)>;
992 def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
993                                       (instrs LDPQi)>;
994 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
995                                       (instregex "LDNP(D|S)i$")>;
996 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
997                                       (instregex "LDP(D|S)i$")>;
998 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc],
999                                       (instregex "LDP(D|S)(pre|post)$")>;
1000 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc],
1001                                       (instregex "^LDPQ(pre|post)$")>;
1002
1003 // FP Data Processing Instructions
1004 // -----------------------------------------------------------------------------
1005 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCCMP(E)?(S|D)rr$")>;
1006 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
1007 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
1008 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(FABS|FNEG)(S|D)r$")>;
1009 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCSEL(S|D)rrr$")>;
1010
1011 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>;
1012 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>;
1013 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs FCVTSHr, FCVTDHr)>;
1014 def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>;
1015
1016 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^FABD(32|64)$")>;
1017 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(FADD|FSUB)(S|D)rr$")>;
1018 def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FCVTHSr, FCVTHDr)>;
1019
1020 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs FCVTSDr, FCVTDSr)>;
1021
1022 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
1023                                       (instregex "^F(N)?MULSrr$")>;
1024
1025 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
1026                                       (instregex "^F(N)?MULDrr$")>;
1027
1028 def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(S|D)rr$")>;
1029 def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(S|D)r$")>;
1030
1031 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
1032                                       (instregex "^F(N)?M(ADD|SUB)Srrr$")>;
1033 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64],
1034                                       (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
1035
1036 // FP Miscellaneous Instructions
1037 // -----------------------------------------------------------------------------
1038 def : InstRW<[FalkorWr_FMOV],         (instregex "^FMOV(WS|XD|XDHigh)r$")>;
1039 def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^FMOV(S|D)i$")>;
1040 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
1041 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVTZ(S|U)(d|s)$")>;
1042 def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FMOV(SW|DX|DXHigh)r$")>;
1043 def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FMOV(Sr|Dr|v.*_ns)$")>;
1044 // FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
1045 def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs FMOVD0, FMOVS0)>;
1046
1047 def : InstRW<[FalkorWr_1GTOV_4cyc],   (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
1048 def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
1049
1050 def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
1051
1052 // Load Instructions
1053 // -----------------------------------------------------------------------------
1054 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFMui, PRFMl)>;
1055 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFUMi)>;
1056 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
1057                                       (instregex "^LDNP(W|X)i$")>;
1058 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
1059                                       (instregex "^LDP(W|X)i$")>;
1060 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc],
1061                                       (instregex "^LDP(W|X)(post|pre)$")>;
1062 def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDR(BB|HH|W|X)ui$")>;
1063 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc],
1064                                       (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
1065 def : InstRW<[FalkorWr_LDRro],        (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
1066 def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDR(W|X)l$")>;
1067 def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDTR(B|H|W|X)i$")>;
1068 def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^LDUR(BB|HH|W|X)i$")>;
1069 def : InstRW<[FalkorWr_PRFMro],       (instregex "^PRFMro(W|X)$")>;
1070 def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc],
1071                                       (instrs LDPSWi)>;
1072 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc],
1073                                       (instregex "^LDPSW(post|pre)$")>;
1074 def : InstRW<[FalkorWr_1LD_4cyc],     (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
1075 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc],
1076                                       (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
1077 def : InstRW<[FalkorWr_LDRSro],       (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
1078 def : InstRW<[FalkorWr_1LD_4cyc],     (instrs LDRSWl)>;
1079 def : InstRW<[FalkorWr_1LD_4cyc],     (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
1080 def : InstRW<[FalkorWr_1LD_4cyc],     (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
1081
1082 // Miscellaneous Data-Processing Instructions
1083 // -----------------------------------------------------------------------------
1084 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(S|U)?BFM(W|X)ri$")>;
1085 def : InstRW<[FalkorWr_1X_2cyc],      (instregex "^CRC32.*$")>;
1086 def : InstRW<[FalkorWr_1XYZ_2cyc],    (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
1087 def : InstRW<[FalkorWr_2XYZ_2cyc],    (instregex "^EXTR(W|X)rri$")>;
1088
1089 // Divide and Multiply Instructions
1090 // -----------------------------------------------------------------------------
1091 def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
1092                                         (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1093 def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32],
1094                                         (instregex "^M(ADD|SUB)Wrrr$")>;
1095
1096 def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
1097 def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
1098                                         (instregex "^M(ADD|SUB)Xrrr$")>;
1099
1100 def : InstRW<[FalkorWr_1X_1Z_8cyc],     (instregex "^(S|U)DIVWr$")>;
1101 def : InstRW<[FalkorWr_1X_1Z_16cyc],    (instregex "^(S|U)DIVXr$")>;
1102
1103 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
1104                                         (instregex "^(S|U)MULLv.*$")>;
1105 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
1106                                         (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
1107
1108 // Move and Shift Instructions
1109 // -----------------------------------------------------------------------------
1110 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>;
1111 def : InstRW<[FalkorWr_1XYZB_1cyc],   (instregex "^ADRP?$")>;
1112 def : InstRW<[FalkorWr_1XYZB_1cyc],   (instregex "^MOVN(W|X)i$")>;
1113 def : InstRW<[FalkorWr_MOVZ],         (instregex "^MOVZ(W|X)i$")>;
1114 def : InstRW<[FalkorWr_1XYZ_1cyc],    (instrs MOVi32imm, MOVi64imm)>;
1115 def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
1116                                       (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
1117 def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
1118                                       (instrs LOADgot)>;
1119
1120 // Other Instructions
1121 // -----------------------------------------------------------------------------
1122 def : InstRW<[FalkorWr_1LD_0cyc],     (instrs CLREX, DMB, DSB)>;
1123 def : InstRW<[FalkorWr_1none_0cyc],   (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
1124 def : InstRW<[FalkorWr_1ST_0cyc],     (instrs SYSxt, SYSLxt)>;
1125 def : InstRW<[FalkorWr_1Z_0cyc],      (instrs MSRpstateImm1, MSRpstateImm4)>;
1126
1127 def : InstRW<[FalkorWr_1LD_3cyc],     (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>;
1128 def : InstRW<[FalkorWr_1LD_3cyc],     (instrs MRS, MOVbaseTLS)>;
1129
1130 def : InstRW<[FalkorWr_1LD_1Z_3cyc],  (instrs DRPS)>;
1131
1132 def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
1133 def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs STNPWi, STNPXi)>;
1134 def : InstRW<[FalkorWr_2LD_1Z_3cyc],  (instrs ERET)>;
1135
1136 def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
1137 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>;
1138 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>;
1139 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>;
1140
1141 def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>;
1142 def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>;
1143
1144 // Store Instructions
1145 // -----------------------------------------------------------------------------
1146 def : InstRW<[FalkorWr_1SD_1ST_0cyc],     (instregex "^STP(W|X)i$")>;
1147 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc],
1148                                           (instregex "^STP(W|X)(post|pre)$")>;
1149 def : InstRW<[FalkorWr_1SD_1ST_0cyc],     (instregex "^STR(BB|HH|W|X)ui$")>;
1150 def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc],
1151                                           (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
1152 def : InstRW<[FalkorWr_STRro],            (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
1153 def : InstRW<[FalkorWr_1SD_1ST_0cyc],     (instregex "^STTR(B|H|W|X)i$")>;
1154 def : InstRW<[FalkorWr_1SD_1ST_0cyc],     (instregex "^STUR(BB|HH|W|X)i$")>;
1155