1 //==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the uop and latency details for the machine model for the
11 // Qualcomm Falkor subtarget.
13 //===----------------------------------------------------------------------===//
15 // Contains all of the Falkor specific SchedWriteRes types. The approach
16 // below is to define a generic SchedWriteRes for every combination of
17 // latency and microOps. The naming conventions is to use a prefix, one field
18 // for latency, and one or more microOp count/type designators.
20 // MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
23 // e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
24 // down one Z pipe, six SD pipes, four VX pipes and the total latency is
27 // Contains all of the Falkor specific ReadAdvance types for forwarding logic.
29 // Contains all of the Falkor specific WriteVariant types for immediate zero
31 //===----------------------------------------------------------------------===//
33 //===----------------------------------------------------------------------===//
34 // Define 0 micro-op types
35 def FalkorWr_LdStInc_none_3cyc : SchedWriteRes<[]> {
39 def FalkorWr_none_3cyc : SchedWriteRes<[]> {
43 def FalkorWr_none_4cyc : SchedWriteRes<[]> {
48 //===----------------------------------------------------------------------===//
49 // Define 1 micro-op types
51 def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
52 def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
53 def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
54 def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
55 def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
56 def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
57 def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
58 def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
59 def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; }
60 def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
61 def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
62 def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
63 def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
64 def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
66 def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; }
67 def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
68 def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
69 def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
70 def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
71 def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
72 def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
73 def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
74 def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
75 def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
77 def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
78 def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
79 def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
81 def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; }
82 def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
83 def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
84 def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
86 //===----------------------------------------------------------------------===//
87 // Define 2 micro-op types
89 def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
93 def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
97 def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
101 def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
105 def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
109 def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
113 def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
117 def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
121 def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
125 def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
130 def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
134 def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
138 def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
143 def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
148 def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
153 def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
158 def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
163 def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
168 def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
173 def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
178 def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
183 def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
188 def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
192 def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
197 def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
202 def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
207 def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
210 let ResourceCycles = [2, 8];
213 def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
216 let ResourceCycles = [2, 11];
219 def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
224 def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
229 def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
234 def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> {
239 //===----------------------------------------------------------------------===//
240 // Define 3 micro-op types
242 def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
248 def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
254 def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
259 def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
264 def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
269 def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
274 def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
279 def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
284 def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
290 def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
296 def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> {
300 def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> {
304 //===----------------------------------------------------------------------===//
305 // Define 4 micro-op types
307 def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
308 FalkorUnitVX, FalkorUnitVY]> {
313 def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
314 FalkorUnitVX, FalkorUnitVY]> {
319 def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
320 FalkorUnitVX, FalkorUnitVY]> {
325 def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
326 FalkorUnitVX, FalkorUnitVY]> {
331 def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
332 FalkorUnitVXVY, FalkorUnitVXVY]> {
336 def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
337 FalkorUnitVXVY, FalkorUnitVXVY]> {
341 def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
342 FalkorUnitVXVY, FalkorUnitVXVY]> {
346 def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
347 FalkorUnitVXVY, FalkorUnitVXVY]> {
352 def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
353 FalkorUnitLD, FalkorUnitLD]> {
358 def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
359 FalkorUnitVXVY, FalkorUnitVXVY]> {
364 def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
369 def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
370 FalkorUnitSD, FalkorUnitLD]> {
375 def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
376 FalkorUnitST, FalkorUnitVSD]> {
381 //===----------------------------------------------------------------------===//
382 // Define 5 micro-op types
384 def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
385 FalkorUnitVXVY, FalkorUnitVXVY,
390 def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
391 FalkorUnitVXVY, FalkorUnitVXVY]> {
395 def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
396 FalkorUnitVXVY, FalkorUnitVXVY,
401 def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
402 FalkorUnitVSD, FalkorUnitST,
407 def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
408 FalkorUnitVSD, FalkorUnitST,
413 //===----------------------------------------------------------------------===//
414 // Define 6 micro-op types
416 def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
417 FalkorUnitVXVY, FalkorUnitVXVY]> {
422 def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
423 FalkorUnitVSD, FalkorUnitXYZ,
424 FalkorUnitST, FalkorUnitVSD]> {
429 def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
430 FalkorUnitVSD, FalkorUnitVXVY,
431 FalkorUnitST, FalkorUnitVSD]> {
436 def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
437 FalkorUnitST, FalkorUnitVSD,
438 FalkorUnitST, FalkorUnitVSD]> {
443 //===----------------------------------------------------------------------===//
444 // Define 8 micro-op types
446 def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
447 FalkorUnitVXVY, FalkorUnitVXVY,
448 FalkorUnitLD, FalkorUnitLD,
449 FalkorUnitVXVY, FalkorUnitVXVY]> {
454 def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
455 FalkorUnitST, FalkorUnitVSD,
456 FalkorUnitST, FalkorUnitVSD,
457 FalkorUnitST, FalkorUnitVSD]> {
462 //===----------------------------------------------------------------------===//
463 // Define 9 micro-op types
465 def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
466 FalkorUnitLD, FalkorUnitVXVY,
467 FalkorUnitVXVY, FalkorUnitLD,
468 FalkorUnitLD, FalkorUnitXYZ,
469 FalkorUnitVXVY, FalkorUnitVXVY]> {
474 def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
475 FalkorUnitLD, FalkorUnitVXVY,
476 FalkorUnitVXVY, FalkorUnitXYZ,
477 FalkorUnitLD, FalkorUnitLD,
478 FalkorUnitVXVY, FalkorUnitVXVY]> {
483 //===----------------------------------------------------------------------===//
484 // Define 10 micro-op types
486 def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
487 FalkorUnitVSD, FalkorUnitVXVY,
488 FalkorUnitST, FalkorUnitVSD,
489 FalkorUnitST, FalkorUnitVSD,
490 FalkorUnitST, FalkorUnitVSD]> {
492 let NumMicroOps = 10;
495 //===----------------------------------------------------------------------===//
496 // Define 12 micro-op types
498 def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
499 FalkorUnitVSD, FalkorUnitVXVY,
500 FalkorUnitST, FalkorUnitVSD,
501 FalkorUnitVXVY, FalkorUnitST,
502 FalkorUnitVSD, FalkorUnitVXVY,
503 FalkorUnitST, FalkorUnitVSD]> {
505 let NumMicroOps = 12;
508 // Forwarding logic is modeled for multiply add/accumulate and
509 // load/store base register increment.
510 // -----------------------------------------------------------------------------
511 def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
512 def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
513 def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>;
514 def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
515 def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
517 def FalkorReadIncLd : SchedReadAdvance<2, [FalkorWr_LdStInc_none_3cyc]>;
518 def FalkorReadIncSt : SchedReadAdvance<1, [FalkorWr_LdStInc_none_3cyc]>;
520 // SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
521 // -----------------------------------------------------------------------------
522 def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).isImm() &&
523 MI->getOperand(1).getImm() == 0}]>;
524 def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
526 MI->getOperand(1).getReg() == AArch64::XZR}]>;
527 def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
529 def FalkorWr_FMOV : SchedWriteVariant<[
530 SchedVar<FalkorOp1ZrReg, [FalkorWr_1none_0cyc]>,
531 SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
533 def FalkorWr_MOVZ : SchedWriteVariant<[
534 SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
535 SchedVar<NoSchedPred, [FalkorWr_1XYZB_0cyc]>]>; // imm fwd
538 def FalkorWr_ADDSUBsx : SchedWriteVariant<[
539 SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
540 SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
542 def FalkorWr_LDRro : SchedWriteVariant<[
543 SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>,
544 SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
546 def FalkorWr_LDRSro : SchedWriteVariant<[
547 SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
548 SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
550 def FalkorWr_ORRi : SchedWriteVariant<[
551 SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd
552 SchedVar<NoSchedPred, [FalkorWr_1XYZ_1cyc]>]>;
554 def FalkorWr_PRFMro : SchedWriteVariant<[
555 SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
556 SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
558 def FalkorWr_STRVro : SchedWriteVariant<[
559 SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>,
560 SchedVar<NoSchedPred, [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>;
562 def FalkorWr_STRQro : SchedWriteVariant<[
563 SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>,
564 SchedVar<NoSchedPred, [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>;
566 def FalkorWr_STRro : SchedWriteVariant<[
567 SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>,
568 SchedVar<NoSchedPred, [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>;
570 //===----------------------------------------------------------------------===//
571 // Specialize the coarse model by associating instruction groups with the
572 // subtarget-defined types. As the modeled is refined, this will override most
573 // of the earlier mappings.
576 // -----------------------------------------------------------------------------
578 // FIXME: This could be better modeled by looking at the regclasses of the operands.
579 def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>;
581 // SIMD Floating-point Instructions
582 // -----------------------------------------------------------------------------
583 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>;
585 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>;
586 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>;
587 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>;
588 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>;
589 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>;
591 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>;
592 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>;
593 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>;
595 def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>;
596 def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>;
597 def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>;
599 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
600 (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
601 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
604 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
605 (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
606 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
609 def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>;
611 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>;
612 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>;
613 def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>;
614 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
616 def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>;
617 def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>;
619 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
621 def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>;
622 def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>;
623 def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>;
625 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
626 (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>;
628 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
629 (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
631 def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
632 def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
634 def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>;
635 def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>;
636 def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>;
637 def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>;
639 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
640 (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
641 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
642 (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
644 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32],
645 (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
646 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64],
647 (instregex "^FML(A|S)v1i64_indexed$")>;
648 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32],
649 (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>;
650 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64],
651 (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
653 // SIMD Integer Instructions
654 // -----------------------------------------------------------------------------
655 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
656 def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs ADDPv2i64p)>;
657 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
658 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
659 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
660 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
662 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
663 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>;
664 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
665 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>;
666 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
667 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
668 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
669 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
670 def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>;
671 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
672 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>;
674 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
675 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
676 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
677 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)ADDLVv4i16v$")>;
678 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
679 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
680 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
681 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
682 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
683 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
684 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
685 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>;
686 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>;
687 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
688 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
689 def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>;
690 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
691 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
692 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
694 def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>;
695 def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
696 def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>;
697 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
698 (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
699 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
700 (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
701 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
702 (instregex "^SQDMULL(i16|i32)$")>;
703 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
704 (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
706 def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
708 def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs ADDVv4i32v)>;
710 def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs ADDVv8i16v)>;
711 def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(ADD|SUB)HNv.*$")>;
712 def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
714 def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs ADDVv16i8v)>;
716 def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
717 def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^R(ADD|SUB)HNv.*$")>;
719 def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
720 def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs ADDPv2i64)>; // sz==11
721 def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
722 def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
723 def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
725 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)ADDLv.*$")>;
726 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
727 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
728 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
729 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SUBLv.*$")>;
730 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
731 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
732 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
733 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
734 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
735 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>;
736 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
737 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
739 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
740 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
741 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABDLv.*$")>;
742 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
743 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
744 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
745 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
746 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>;
747 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
748 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>;
749 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
750 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
752 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
753 (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
754 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
755 (instregex "^SQDMULLv.*$")>;
756 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
757 (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
759 def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>;
761 def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(S|U)ADDLVv8i16v$")>;
763 def : InstRW<[FalkorWr_3VXVY_6cyc], (instregex "^(S|U)ADDLVv16i8v$")>;
765 def : InstRW<[FalkorWr_4VXVY_2cyc], (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
767 def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>;
769 def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
771 def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
772 (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
773 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
774 (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
776 // SIMD Load Instructions
777 // -----------------------------------------------------------------------------
778 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
779 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
780 (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
781 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
782 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
783 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
784 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instrs LD2i64)>;
785 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
786 (instrs LD2i64_POST)>;
788 def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>;
789 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd],
790 (instregex "^LD1i(8|16|32)_POST$")>;
792 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
793 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
794 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
795 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>;
796 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
797 (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
798 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
799 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
800 (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
802 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
803 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
804 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
805 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
806 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
807 (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
808 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
809 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
810 (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
811 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD3i64)>;
812 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
813 (instrs LD3i64_POST)>;
814 def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD4i64)>;
815 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
816 (instrs LD4i64_POST)>;
818 def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>;
819 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd],
820 (instregex "^LD2i(8|16|32)_POST$")>;
822 def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
823 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
824 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
825 def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
826 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
827 (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
829 def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
830 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
831 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
832 def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instrs LD3Threev2d)>;
833 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
834 (instrs LD3Threev2d_POST)>;
835 def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
836 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
837 (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
839 def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>;
840 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd],
841 (instregex "^LD3i(8|16|32)_POST$")>;
843 def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
844 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
845 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
846 def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
847 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
848 (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
850 def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
851 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
852 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
853 def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instrs LD4Fourv2d)>;
854 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
855 (instrs LD4Fourv2d_POST)>;
856 def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
857 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
858 (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
860 def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>;
861 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd],
862 (instregex "^LD4i(8|16|32)_POST$")>;
864 def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
865 (instregex "^LD3Threev(8b|4h|2s)$")>;
866 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
867 (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
869 def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
870 (instregex "^LD4Fourv(8b|4h|2s)$")>;
871 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
872 (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
874 def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
875 (instregex "^LD3Threev(16b|8h|4s)$")>;
877 def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
878 (instregex "^LD4Fourv(16b|8h|4s)$")>;
880 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd],
881 (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
883 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd],
884 (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
886 // Arithmetic and Logical Instructions
887 // -----------------------------------------------------------------------------
888 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
889 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>;
890 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>;
891 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
892 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
893 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>;
894 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>;
895 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>;
896 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>;
897 def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>;
898 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>;
899 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>;
900 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>;
901 def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
902 def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
904 // SIMD Miscellaneous Instructions
905 // -----------------------------------------------------------------------------
906 def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
907 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
908 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>;
909 def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
910 def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
911 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
912 def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
913 def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
914 def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
915 def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
916 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
917 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
919 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>;
921 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>;
922 def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
923 def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>;
924 def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>;
926 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
927 (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
929 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
930 (instrs FRECPS64, FRSQRTS64)>;
932 def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
933 (instregex "^INSv(i32|i64)(gpr|lane)$")>;
934 def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
935 def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>;
936 def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
937 def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
938 def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
939 def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
941 def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
942 def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
943 def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>;
945 def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>;
946 def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>;
948 def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
949 (instrs FRECPSv4f32, FRSQRTSv4f32)>;
951 def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
952 (instrs FRECPSv2f64, FRSQRTSv2f64)>;
954 def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>;
955 def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>;
957 def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>;
958 def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>;
960 def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>;
961 def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
963 // SIMD Store Instructions
964 // -----------------------------------------------------------------------------
966 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
967 (instregex "^STR(Q|D|S|H|B)ui$")>;
968 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
969 (instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
970 def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt],
971 (instregex "^STR(D|S|H|B)ro(W|X)$")>;
972 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
973 (instregex "^STPQi$")>;
974 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
975 (instregex "^STPQ(post|pre)$")>;
976 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
977 (instregex "^STP(D|S)(i)$")>;
978 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
979 (instregex "^STP(D|S)(post|pre)$")>;
980 def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt],
981 (instregex "^STRQro(W|X)$")>;
982 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
983 (instregex "^STUR(Q|D|S|B|H)i$")>;
984 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
985 (instrs STNPDi, STNPSi)>;
986 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
989 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
990 (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
991 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
992 (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
993 def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
994 (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>;
995 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
996 (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
997 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
998 (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>;
1000 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1001 (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
1002 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1003 (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
1004 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1005 (instregex "^ST3(i8|i16|i32|i64)$")>;
1006 def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1007 (instregex "^ST4(i8|i16|i32|i64)$")>;
1008 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1009 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1010 (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
1011 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1012 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1013 (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
1014 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1015 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1016 (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
1017 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1018 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1019 (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
1021 def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1022 (instregex "^ST3Three(v8b|v4h|v2s)$")>;
1023 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1024 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1025 (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>;
1027 def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1028 (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
1029 def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1030 (instrs ST3Threev2d)>;
1031 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1032 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1033 (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
1034 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1035 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1036 (instrs ST3Threev2d_POST)>;
1038 def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1039 (instregex "^ST4Four(v8b|v4h|v2s)$")>;
1040 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1041 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1042 (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>;
1044 def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1045 (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
1046 def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1047 (instrs ST4Fourv2d)>;
1048 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1049 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1050 (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
1051 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1052 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1053 (instrs ST4Fourv2d_POST)>;
1055 def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1056 (instregex "^ST3Three(v16b|v8h|v4s)$")>;
1057 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1058 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1059 (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
1061 def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1062 (instregex "^ST4Four(v16b|v8h|v4s)$")>;
1063 // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1064 def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1065 (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
1067 // Branch Instructions
1068 // -----------------------------------------------------------------------------
1069 def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>;
1070 def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
1071 def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>;
1072 def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>;
1073 def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>;
1074 def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>;
1076 // Cryptography Extensions
1077 // -----------------------------------------------------------------------------
1078 def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs SHA1Hrr)>;
1079 def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs AESIMCrr, AESMCrr)>;
1080 def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs AESDrr, AESErr)>;
1081 def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
1082 def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
1083 def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
1084 def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
1086 // FP Load Instructions
1087 // -----------------------------------------------------------------------------
1088 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1089 (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
1090 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
1091 (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
1092 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1093 (instregex "^LDUR(Q|D|S|H|B)i$")>;
1094 def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
1095 (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
1096 def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1098 def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1100 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1101 (instregex "LDNP(D|S)i$")>;
1102 def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1103 (instregex "LDP(D|S)i$")>;
1104 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1105 (instregex "LDP(D|S)(pre|post)$")>;
1106 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1107 (instregex "^LDPQ(pre|post)$")>;
1109 // FP Data Processing Instructions
1110 // -----------------------------------------------------------------------------
1111 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>;
1112 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
1113 def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
1114 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>;
1115 def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>;
1117 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>;
1118 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>;
1119 def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>;
1120 def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>;
1122 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>;
1123 def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>;
1124 def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>;
1126 def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>;
1128 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
1129 (instregex "^F(N)?MULSrr$")>;
1131 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
1132 (instregex "^F(N)?MULDrr$")>;
1134 def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>;
1135 def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>;
1136 def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>;
1137 def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>;
1139 def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
1140 (instregex "^F(N)?M(ADD|SUB)Srrr$")>;
1141 def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64],
1142 (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
1144 // FP Miscellaneous Instructions
1145 // -----------------------------------------------------------------------------
1146 def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>;
1147 def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd
1148 def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
1149 def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>;
1150 def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>;
1151 def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd
1152 // FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
1153 def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd
1155 def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
1156 def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
1158 def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
1160 // Load Instructions
1161 // -----------------------------------------------------------------------------
1162 def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
1163 def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
1164 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1165 (instregex "^LDNP(W|X)i$")>;
1166 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1167 (instregex "^LDP(W|X)i$")>;
1168 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1169 (instregex "^LDP(W|X)(post|pre)$")>;
1170 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1171 (instregex "^LDR(BB|HH|W|X)ui$")>;
1172 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
1173 (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
1174 def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
1175 (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
1176 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1177 (instregex "^LDR(W|X)l$")>;
1178 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1179 (instregex "^LDTR(B|H|W|X)i$")>;
1180 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1181 (instregex "^LDUR(BB|HH|W|X)i$")>;
1182 def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>;
1183 def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
1185 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
1186 (instregex "^LDPSW(post|pre)$")>;
1187 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1188 (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
1189 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd],
1190 (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
1191 def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd],
1192 (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
1193 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1195 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1196 (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
1197 def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1198 (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
1200 // Miscellaneous Data-Processing Instructions
1201 // -----------------------------------------------------------------------------
1202 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>;
1203 def : InstRW<[FalkorWr_1X_2cyc], (instregex "^CRC32.*$")>;
1204 def : InstRW<[FalkorWr_1XYZ_2cyc], (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
1205 def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>;
1207 // Divide and Multiply Instructions
1208 // -----------------------------------------------------------------------------
1209 def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
1210 (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1211 def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32],
1212 (instregex "^M(ADD|SUB)Wrrr$")>;
1214 def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
1215 def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
1216 (instregex "^M(ADD|SUB)Xrrr$")>;
1218 def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
1219 def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>;
1221 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
1222 (instregex "^(S|U)MULLv.*$")>;
1223 def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
1224 (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
1226 // Move and Shift Instructions
1227 // -----------------------------------------------------------------------------
1228 def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>;
1229 def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd
1230 def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd
1231 def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd
1232 def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
1233 def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation)
1234 def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
1235 (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
1236 def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
1239 // Other Instructions
1240 // -----------------------------------------------------------------------------
1241 def : InstRW<[FalkorWr_1LD_0cyc], (instrs CLREX, DMB, DSB)>;
1242 def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
1243 def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>;
1244 def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>;
1246 def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1247 (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>;
1248 def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1249 (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>;
1250 def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>;
1252 def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
1254 def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
1255 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1256 (instrs STNPWi, STNPXi)>;
1257 def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
1259 def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>;
1260 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt],
1261 (instregex "^STLR(B|H|W|X)$")>;
1262 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1263 (instregex "^STXP(W|X)$")>;
1264 def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1265 (instregex "^STXR(B|H|W|X)$")>;
1267 def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt],
1268 (instregex "^STLXP(W|X)$")>;
1269 def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1270 (instregex "^STLXR(B|H|W|X)$")>;
1272 // Store Instructions
1273 // -----------------------------------------------------------------------------
1274 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1275 (instregex "^STP(W|X)i$")>;
1276 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1277 (instregex "^STP(W|X)(post|pre)$")>;
1278 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1279 (instregex "^STR(BB|HH|W|X)ui$")>;
1280 def : InstRW<[FalkorWr_LdStInc_none_3cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1281 (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
1282 def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt],
1283 (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
1284 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1285 (instregex "^STTR(B|H|W|X)i$")>;
1286 def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1287 (instregex "^STUR(BB|HH|W|X)i$")>;