contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM55.td

   1 //==- ARMScheduleM55.td - Arm Cortex-M55 Scheduling Definitions -*- tablegen -*-=//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file defines the scheduling model for the Arm Cortex-M55 processors.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 // ===---------------------------------------------------------------------===//
  14 // Cortex-M55 is a lot like the M4/M33 in terms of scheduling. It technically
  15 // has an extra pipeline stage but that is unimportant for scheduling, just
  16 // starting our model a stage later. The main points of interest over an
  17 // Cortex-M4 are MVE instructions and the ability to dual issue thumb1
  18 // instructions.
  19 //
  20 //
  21 // MVE
  22 //
  23 // The EPU pipelines now include both MVE and FP instructions. It has four
  24 // pipelines across 4 stages (E1-E4). These pipelines are "control",
  25 // "load/store", "integer" and "float/mul". We start the schedule at E2 to line
  26 // up with the rest of the pipeline we model, and take the latency as the time
  27 // between reading registers (almost always in E2) and register write (or
  28 // forward, if it allows it). This mean that a lot of instructions (including
  29 // loads) actually take 1 cycle (amazingly).
  30 //
  31 // Each MVE instruction needs to take 2 beats, each performing 64bits of the
  32 // 128bit vector operation. So long as the beats are to different pipelines,
  33 // the execution of the first-beat-of-the-second-instruction can overlap with
  34 // the second-beat-of-the-first. For example a sequence of VLDR;VADD;VMUL;VSTR
  35 // can look like this is a pipeline:
  36 //          1    2    3    4    5
  37 // LD/ST  : VLDR VLDR      VSTR VSTR
  38 // INTEGER:      VADD VADD
  39 // FP/MUL :           VMUL VMUL
  40 //
  41 // But a sequence of VLDR;VLDRB;VADD;VSTR because the loads cannot overlap,
  42 // looks like:
  43 //          1     2     3     4     5    6
  44 // LD/ST  : VLDR  VLDR  VLDRB VLDRB VSTR VSTR
  45 // INTEGER:                   VADD  VADD
  46 //
  47 // For this schedule, we currently model latencies and pipelines well for each
  48 // instruction. MVE instruction take two beats, modelled using
  49 // ResourceCycles=[2].
  50 //
  51 //
  52 // Dual Issue
  53 //
  54 // Cortex-M55 can dual issue two 16-bit T1 instructions providing one is one of
  55 // NOPs, ITs, Brs, ADDri/SUBri, UXTB/H, SXTB/H and MOVri's. NOPs and IT's are
  56 // not relevant (they will not appear when scheduling), Brs are only at the end
  57 // of the block. The others are more useful, and where the problems arise.
  58 //
  59 // The first problem comes from the fact that we will only be seeing Thumb2
  60 // instructions at the point in the pipeline where we do the scheduling. The
  61 // Thumb2SizeReductionPass has not been run yet. Especially pre-ra scheduling
  62 // (where the scheduler has the most freedom) we can only really guess at which
  63 // instructions will become thumb1 instructions. We are quite optimistic, and
  64 // may get some things wrong as a result.
  65 //
  66 // The other problem is one of telling llvm what to do exactly. The way we
  67 // attempt to meld this is:
  68 //  Set IssueWidth to 2 to allow 2 instructions per cycle.
  69 //  All instructions we cannot dual issue are "SingleIssue=1" (MVE/FP and T2
  70 //    instructions)
  71 //  We guess at another set of instructions that will become T1 instruction.
  72 //    These become the primary instruction in a dual issue pair (the normal
  73 //    one). These use normal resources and latencies, but set SingleIssue = 0.
  74 //  We guess at another set of instructions that will be shrank down into T1 DI
  75 //    instructions (add, sub, mov's, etc), which become the secondary. These
  76 //    don't use a resource, and set SingleIssue = 0.
  77 //
  78 // So our guessing is a bit rough. It may be possible to improve this by moving
  79 // T2SizeReduction pass earlier in the pipeline, for example, so that at least
  80 // Post-RA scheduling sees what is T1/T2. It may also be possible to write a
  81 // custom instruction matcher for more accurately guess at T1 instructions.
  82
  83
  84 def CortexM55Model : SchedMachineModel {
  85   let MicroOpBufferSize = 0;      // Explicitly set to zero since M55 is in-order.
  86   let IssueWidth = 2;             // There is some dual-issue support in M55.
  87   let MispredictPenalty = 3;      // Default is 10
  88   let LoadLatency = 4;            // Default is 4
  89   let PostRAScheduler = 1;
  90   let FullInstRWOverlapCheck = 1;
  91
  92   let CompleteModel = 0;
  93   let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasMatMulInt8, HasZCZ,
  94                              IsNotMClass, HasV8, HasV8_3a, HasTrustZone, HasDFB,
  95                              IsWindows];
  96 }
  97
  98
  99 let SchedModel = CortexM55Model in {
 100
 101 //===----------------------------------------------------------------------===//
 102 // Define each kind of processor resource and number available.
 103
 104 // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
 105 // M55 is in-order.
 106 def M55UnitALU : ProcResource<1> { let BufferSize = 0; } // Int ALU
 107 def M55UnitVecALU : ProcResource<1> { let BufferSize = 0; } // MVE integer pipe
 108 def M55UnitVecFPALU : ProcResource<1> { let BufferSize = 0; } // MVE float pipe
 109 def M55UnitLoadStore : ProcResource<1> { let BufferSize = 0; } // MVE load/store pipe
 110 def M55UnitVecSys : ProcResource<1> { let BufferSize = 0; } // MVE control/sys pipe
 111
 112 // Some VMOV's can go down either pipeline. FIXME: This M55Write2IntFPE2 is
 113 // intended to model the VMOV taking either Int or FP for 2 cycles. It is not
 114 // clear if the llvm scheduler is using it like we want though.
 115 def M55UnitVecIntFP: ProcResGroup<[M55UnitVecALU, M55UnitVecFPALU]>;
 116
 117
 118 //===----------------------------------------------------------------------===//
 119 // Subtarget-specific SchedWrite types which both map the ProcResources and
 120 // set the latency.
 121
 122 //=====//
 123 // ALU //
 124 //=====//
 125
 126 // Generic writes for Flags, GRPs and other extra operands (eg post-inc, vadc flags, vaddlv etc)
 127 def M55WriteLat0  : SchedWriteRes<[]>  { let Latency = 0; let NumMicroOps = 0; }
 128 def M55WriteLat1  : SchedWriteRes<[]>  { let Latency = 1; let NumMicroOps = 0; }
 129 def M55WriteLat2  : SchedWriteRes<[]>  { let Latency = 2; let NumMicroOps = 0; }
 130
 131 // DX instructions are ALU instructions that take a single cycle. The
 132 // instructions that may be shrank to T1 (and can be dual issued) are
 133 // SingleIssue = 0. The others are SingleIssue = 1.
 134 let SingleIssue = 0, Latency = 1 in {
 135     def : WriteRes<WriteALU, [M55UnitALU]>;
 136     def : WriteRes<WriteCMP, [M55UnitALU]>;
 137     def : WriteRes<WriteBr, [M55UnitALU]>;
 138     def : WriteRes<WriteBrL, [M55UnitALU]>;
 139     def : WriteRes<WriteBrTbl, [M55UnitALU]>;
 140     def : WriteRes<WriteST, [M55UnitALU]>;
 141     def M55WriteDX_DI : SchedWriteRes<[M55UnitALU]>;
 142 }
 143 let SingleIssue = 1, Latency = 1 in {
 144     def : WriteRes<WritePreLd, [M55UnitALU]>;
 145     def M55WriteDX_SI : SchedWriteRes<[M55UnitALU]>;
 146 }
 147
 148 def : InstRW<[M55WriteDX_SI], (instregex "t2BF[CI]", "t2CPS", "t2DBG",
 149           "t2MRS", "t2MSR", "t2SEL", "t2SG", "t2TT")>;
 150 def : InstRW<[M55WriteDX_SI], (instregex "t2SUBS_PC_LR", "COPY")>;
 151 def : InstRW<[M55WriteDX_SI], (instregex "t2CS(EL|INC|INV|NEG)")>;
 152 // Thumb 2 instructions that could be reduced to a thumb 1 instruction and can
 153 // be dual issued with one of the above. This list is optimistic.
 154 def : InstRW<[M55WriteDX_DI], (instregex "t2ADDC?rr$", "t2ADDrr$",
 155            "t2ADDSrr$", "t2ANDrr$", "t2ASRr[ir]$", "t2BICrr$", "t2CMNzrr$",
 156            "t2CMPr[ir]$", "t2EORrr$", "t2LSLr[ir]$", "t2LSRr[ir]$", "t2MVNr$",
 157            "t2ORRrr$", "t2REV(16|SH)?$", "t2RORrr$", "t2RSBr[ir]$", "t2RSBSri$",
 158            "t2SBCrr$", "t2SUBS?rr$", "t2TEQrr$", "t2TSTrr$", "t2STRi12$",
 159            "t2STRs$", "t2STRBi12$", "t2STRBs$", "t2STRHi12$", "t2STRHs$",
 160            "t2STR_POST$", "t2STMIA$", "t2STMIA_UPD$", "t2STMDB$", "t2STMDB_UPD$")>;
 161 def : InstRW<[M55WriteDX_DI], (instregex "t2SETPAN$", "tADC$", "tADDhirr$",
 162            "tADDrSP$", "tADDrSPi$", "tADDrr$", "tADDspi$", "tADDspr$", "tADR$",
 163            "tAND$", "tASRri$", "tASRrr$", "tBIC$", "tBKPT$", "tCBNZ$", "tCBZ$",
 164            "tCMNz$", "tCMPhir$", "tCMPi8$", "tCMPr$", "tCPS$", "tEOR$", "tHINT$",
 165            "tHLT$", "tLSLri$", "tLSLrr$", "tLSRri$", "tLSRrr$", "tMOVSr$",
 166            "tMUL$", "tMVN$", "tORR$", "tPICADD$", "tPOP$", "tPUSH$", "tREV$",
 167            "tREV16$", "tREVSH$", "tROR$", "tRSB$", "tSBC$", "tSETEND$",
 168            "tSTMIA_UPD$", "tSTRBi$", "tSTRBr$", "tSTRHi$", "tSTRHr$", "tSTRi$",
 169            "tSTRr$", "tSTRspi$", "tSUBrr$", "tSUBspi$", "tSVC$", "tTRAP$",
 170            "tTST$", "tUDF$")>;
 171 def : InstRW<[M55WriteDX_DI], (instregex "tB$", "tBLXNSr$", "tBLXr$", "tBX$",
 172            "tBXNS$", "tBcc$")>;
 173
 174
 175 // CX instructions take 2 (or more) cycles. Again T1 instructions may be dual
 176 // issues (SingleIssue = 0)
 177 let SingleIssue = 0, Latency = 2 in {
 178     def : WriteRes<WriteLd, [M55UnitALU]>;
 179     def M55WriteCX_DI  : SchedWriteRes<[M55UnitALU]>;
 180 }
 181 let SingleIssue = 1, Latency = 2 in {
 182     def : WriteRes<WriteALUsi, [M55UnitALU]>;
 183     def : WriteRes<WriteALUsr, [M55UnitALU]>;
 184     def : WriteRes<WriteALUSsr, [M55UnitALU]>;
 185     def : WriteRes<WriteCMPsi, [M55UnitALU]>;
 186     def : WriteRes<WriteCMPsr, [M55UnitALU]>;
 187     def : WriteRes<WriteDIV, [M55UnitALU]>;
 188     def M55WriteCX_SI  : SchedWriteRes<[M55UnitALU]>;
 189 }
 190
 191 def : SchedAlias<WriteMUL16, M55WriteCX_SI>;
 192 def : SchedAlias<WriteMUL32, M55WriteCX_SI>;
 193 def : SchedAlias<WriteMUL64Lo, M55WriteCX_SI>;
 194 def : WriteRes<WriteMUL64Hi, []> { let Latency = 2; }
 195 def : SchedAlias<WriteMAC16, M55WriteCX_SI>;
 196 def : SchedAlias<WriteMAC32, M55WriteCX_SI>;
 197 def : SchedAlias<WriteMAC64Lo, M55WriteCX_SI>;
 198 def : WriteRes<WriteMAC64Hi, []> { let Latency = 2; }
 199
 200 def : InstRW<[M55WriteCX_SI], (instregex "t2CDP", "t2CLREX", "t2[DI][MS]B",
 201            "t2MCR", "t2MOVSs[ir]", "t2MRC", "t2MUL", "t2STC")>;
 202 def : InstRW<[M55WriteCX_SI], (instregex "t2Q", "t2[SU](ADD|ASX|BFX|DIV)",
 203            "t2[SU]H(ADD|ASX|SUB|SAX)", "t2SM[LM]", "t2S(SAT|SUB|SAX)", "t2UQ",
 204            "t2USA", "t2USUB", "t2UXTA[BH]")>;
 205 def : InstRW<[M55WriteCX_SI], (instregex "t2LD[AC]", "t2STL", "t2STRD")>;
 206 def : InstRW<[M55WriteCX_SI], (instregex "MVE_[SU]Q?R?SH[LR]$")>;
 207 def : InstRW<[M55WriteCX_SI, M55WriteLat2], (instregex "MVE_ASRL", "MVE_LSLL",
 208             "MVE_LSRL", "MVE_[SU]Q?R?SH[LR]L")>;
 209 // This may be higher in practice, but that likely doesn't make a difference
 210 // for scheduling
 211 def : InstRW<[M55WriteCX_SI], (instregex "t2CLRM")>;
 212
 213 def : InstRW<[M55WriteCX_DI], (instregex "t2LDR[BH]?i12$", "t2LDRS?[BH]?s$",
 214            "t2LDM")>;
 215 def : InstRW<[M55WriteCX_DI], (instregex "tLDM", "tLDRBi$", "tLDRBr$",
 216            "tLDRHi$", "tLDRHr$", "tLDRSB$", "tLDRSH$", "tLDRi$", "tLDRpci$",
 217            "tLDRr$", "tLDRspi$")>;
 218
 219 // Dual Issue instructions
 220 let Latency = 1, SingleIssue = 0 in {
 221     def : WriteRes<WriteNoop, []>;
 222     def M55WriteDI : SchedWriteRes<[]>;
 223 }
 224
 225 def : InstRW<[M55WriteDI], (instregex "tADDi[38]$", "tSUBi[38]$", "tMOVi8$",
 226            "tMOVr$", "tUXT[BH]$", "tSXT[BH]$")>;
 227 // Thumb 2 instructions that could be reduced to a dual issuable Thumb 1
 228 // instruction above.
 229 def : InstRW<[M55WriteDI], (instregex "t2ADDS?ri$", "t2MOV[ir]$", "t2MOVi16$",
 230            "t2MOVr$", "t2SUBS?ri$", "t2[US]XT[BH]$")>;
 231 def : InstRW<[M55WriteDI], (instregex "t2IT", "IT")>;
 232
 233
 234 def : InstRW<[M55WriteLat0], (instregex "t2LoopDec")>;
 235
 236 // Forwarding
 237
 238 // No forwarding in the ALU normally
 239 def : ReadAdvance<ReadALU, 0>;
 240 def : ReadAdvance<ReadALUsr, 0>;
 241 def : ReadAdvance<ReadMUL, 0>;
 242 def : ReadAdvance<ReadMAC, 0>;
 243
 244 //=============//
 245 // MVE and VFP //
 246 //=============//
 247
 248 // The Writes that take ResourceCycles=[2] are MVE instruction, the others VFP.
 249
 250 let SingleIssue = 1, Latency = 1 in {
 251   def M55WriteLSE2 : SchedWriteRes<[M55UnitLoadStore]>;
 252   def M55WriteIntE2 : SchedWriteRes<[M55UnitVecALU]>;
 253   def M55WriteFloatE2 : SchedWriteRes<[M55UnitVecFPALU]>;
 254   def M55WriteSysE2 : SchedWriteRes<[M55UnitVecSys]>;
 255
 256   def M55Write2LSE2 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; }
 257   def M55Write2IntE2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
 258   def M55Write2FloatE2 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
 259   def M55Write2IntFPE2 : SchedWriteRes<[M55UnitVecIntFP]> { let ResourceCycles=[2]; }
 260 }
 261
 262 let SingleIssue = 1, Latency = 2 in {
 263   def M55WriteLSE3 : SchedWriteRes<[M55UnitLoadStore]>;
 264   def M55WriteIntE3 : SchedWriteRes<[M55UnitVecALU]>;
 265   def M55WriteFloatE3 : SchedWriteRes<[M55UnitVecFPALU]>;
 266
 267   def M55Write2LSE3 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; }
 268   def M55Write2IntE3 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
 269   def M55Write2FloatE3 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
 270 }
 271
 272 let SingleIssue = 1, Latency = 3 in {
 273   def M55Write2IntE3Plus1 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
 274
 275   // Same as M55Write2IntE3/M55Write2FloatE3 above, but longer latency and no forwarding into stores
 276   def M55Write2IntE4NoFwd : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
 277   def M55Write2FloatE4NoFwd : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
 278 }
 279 let SingleIssue = 1, Latency = 4 in {
 280   def M55Write2IntE3Plus2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
 281   def M55WriteFloatE3Plus2 : SchedWriteRes<[M55UnitVecFPALU]>;
 282 }
 283 let SingleIssue = 1, Latency = 9 in {
 284   def M55WriteFloatE3Plus7 : SchedWriteRes<[M55UnitVecFPALU]>;
 285 }
 286 let SingleIssue = 1, Latency = 15 in {
 287   def M55WriteFloatE3Plus13 : SchedWriteRes<[M55UnitVecFPALU]>;
 288 }
 289 let SingleIssue = 1, Latency = 16 in {
 290   def M55WriteFloatE3Plus14 : SchedWriteRes<[M55UnitVecFPALU]>;
 291 }
 292 let SingleIssue = 1, Latency = 21 in {
 293   def M55WriteFloatE3Plus19 : SchedWriteRes<[M55UnitVecFPALU]>;
 294 }
 295 // VMUL (Double precision) + VADD (Double precision)
 296 let SingleIssue = 1, Latency = 24 in {
 297   def M55WriteFloatE3Plus22 : SchedWriteRes<[M55UnitVecFPALU]>;
 298 }
 299 let SingleIssue = 1, Latency = 30 in {
 300   def M55WriteFloatE3Plus28 : SchedWriteRes<[M55UnitVecFPALU]>;
 301 }
 302 let SingleIssue = 1, Latency = 36 in {
 303   def M55WriteFloatE3Plus34 : SchedWriteRes<[M55UnitVecFPALU]>;
 304 }
 305
 306 def M55Read0 : SchedReadAdvance<0>;
 307 def M55Read1 : SchedReadAdvance<1, [M55Write2LSE3, M55Write2IntE3, M55Write2FloatE3]>;
 308 def M55GatherQRead : SchedReadAdvance<-4>;
 309
 310 // MVE instructions
 311
 312 // Loads and Stores of different kinds
 313
 314 // Normal loads
 315 def : InstRW<[M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)$")>;
 316 // Pre/post inc loads
 317 def : InstRW<[M55WriteLat1, M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)_(post|pre)$")>;
 318 // Gather loads
 319 def : InstRW<[M55Write2LSE3, M55Read0, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_rq")>;
 320 def : InstRW<[M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_qi$")>;
 321 def : InstRW<[M55WriteLat1, M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(W|D)U(32|64)_qi_pre$")>;
 322 // Interleaving loads
 323 def : InstRW<[M55Write2LSE2], (instregex "MVE_VLD[24][0-3]_(8|16|32)$")>;
 324 // Interleaving loads with wb
 325 def : InstRW<[M55Write2LSE2, M55WriteLat1], (instregex "MVE_VLD[24][0-3]_(8|16|32)_wb$")>;
 326
 327 // Normal stores
 328 def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)$")>;
 329 // Pre/post inc stores
 330 def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)_(post|pre)$")>;
 331 // Scatter stores
 332 def : InstRW<[M55Write2LSE2, M55Read0, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_rq")>;
 333 def : InstRW<[M55Write2LSE2, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_qi")>;
 334 // Interleaving stores
 335 def : InstRW<[M55Write2LSE2], (instregex "MVE_VST(2|4)")>;
 336
 337 // Integer pipe operations
 338
 339 def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_VABAV")>;
 340 def : InstRW<[M55Write2IntE2], (instregex "MVE_VABD(u|s)")>;
 341 def : InstRW<[M55Write2IntE2], (instregex "MVE_VABS(u|s)")>;
 342 def : InstRW<[M55Write2IntE3], (instregex "MVE_VADC")>;
 343 def : InstRW<[M55Write2IntE2], (instregex "MVE_VADD(_qr_)?i")>;
 344 def : InstRW<[M55Write2IntE2], (instregex "MVE_VAND")>;
 345 def : InstRW<[M55Write2IntE2], (instregex "MVE_VBIC")>;
 346 def : InstRW<[M55Write2IntE2], (instregex "MVE_VBRSR")>;
 347 def : InstRW<[M55Write2IntE2], (instregex "MVE_VCADDi")>;
 348 def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLS")>;
 349 def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLZ")>;
 350 def : InstRW<[M55Write2IntE2], (instregex "MVE_V(D|I)?W?DUP")>;
 351 def : InstRW<[M55Write2IntE2], (instregex "MVE_VEOR")>;
 352 def : InstRW<[M55Write2IntE2], (instregex "MVE_VHADD")>;
 353 def : InstRW<[M55Write2IntE2], (instregex "MVE_VHCADD")>;
 354 def : InstRW<[M55Write2IntE2], (instregex "MVE_VHSUB")>;
 355 def : InstRW<[M55Write2IntE2], (instregex "MVE_V(MAX|MIN)A?(s|u)")>;
 356 def : InstRW<[M55Write2IntE3], (instregex "MVE_V(MAX|MIN)A?V(s|u)8")>;
 357 def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_V(MAX|MIN)A?V(s|u)16")>;
 358 def : InstRW<[M55Write2IntE3Plus2], (instregex "MVE_V(MAX|MIN)A?V(s|u)32")>;
 359 def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VMOVN")>;
 360 def : InstRW<[M55Write2IntE2], (instregex "MVE_VMOVL")>;
 361 def : InstRW<[M55Write2IntE3], (instregex "MVE_VMULL[BT]p")>;
 362 def : InstRW<[M55Write2IntE2], (instregex "MVE_VMVN")>;
 363 def : InstRW<[M55Write2IntE2], (instregex "MVE_VNEG(u|s)")>;
 364 def : InstRW<[M55Write2IntE2], (instregex "MVE_VORN")>;
 365 def : InstRW<[M55Write2IntE2], (instregex "MVE_VORR")>;
 366 def : InstRW<[M55Write2IntE2], (instregex "MVE_VPSEL")>;
 367 def : InstRW<[M55Write2IntE2], (instregex "MQPRCopy")>;
 368 def : InstRW<[M55Write2IntE2], (instregex "MVE_VQABS")>;
 369 def : InstRW<[M55Write2IntE2], (instregex "MVE_VQADD")>;
 370 def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQMOV")>;
 371 def : InstRW<[M55Write2IntE2], (instregex "MVE_VQNEG")>;
 372 def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHL")>;
 373 def : InstRW<[M55Write2IntE3], (instregex "MVE_V[QR]SHL")>;
 374 def : InstRW<[M55Write2IntE3], (instregex "MVE_VQRSHL")>;
 375 def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQ?R?SHRU?N")>;
 376 def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHR_")>;
 377 def : InstRW<[M55Write2IntE3], (instregex "MVE_VRSHR_")>;
 378 def : InstRW<[M55Write2IntE2], (instregex "MVE_VQSUB")>;
 379 def : InstRW<[M55Write2IntE2], (instregex "MVE_VREV")>;
 380 def : InstRW<[M55Write2IntE2], (instregex "MVE_VRHADD")>;
 381 def : InstRW<[M55Write2IntE3], (instregex "MVE_VSBC")>;
 382 def : InstRW<[M55Write2IntE2], (instregex "MVE_VSLI")>;
 383 def : InstRW<[M55Write2IntE2], (instregex "MVE_VSRI")>;
 384 def : InstRW<[M55Write2IntE2], (instregex "MVE_VSUB(_qr_)?i")>;
 385
 386 // FP/Mul pipe operations.
 387
 388 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABDf")>;
 389 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABSf")>;
 390 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VADDf")>;
 391 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADD_qr_f")>;
 392 def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VADDLV")>;
 393 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADDV")>;
 394 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCADDf")>;
 395 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMLA")>;
 396 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMUL")>;
 397 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMP(i|s|u)", "MVE_VPTv(4|8|16)(i|s|u)")>;
 398 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMPf", "MVE_VPTv(4|8)f")>;
 399 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf16(u|s)16")>;
 400 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32(u|s)32")>;
 401 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)16f16")>;
 402 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)32f32")>;
 403 def : InstRW<[M55Write2FloatE4NoFwd], (instregex "MVE_VCVTf16f32")>;
 404 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32f16")>;
 405 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VFM(A|S)")>;
 406 def : InstRW<[M55Write2FloatE2], (instregex "MVE_V(MIN|MAX)NM")>;
 407 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_from_lane")>;
 408 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_rr_q")>;
 409 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMOVi")>;
 410 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMUL(_qr_)?[if]")>;
 411 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?MULH")>;
 412 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?D?MULL[TB]?[su]")>;
 413 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQDMULL_qr_")>;
 414 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?ML(A|S)[^L]")>;
 415 def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VR?ML(A|S)L")>;
 416 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VNEGf")>;
 417 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VRINTf")>;
 418 def : InstRW<[M55Write2FloatE2], (instregex "MVE_VSUBf")>;
 419 def : InstRW<[M55Write2FloatE3], (instregex "MVE_VSUB_qr_f")>;
 420
 421 // Some VMOV's can go down either pipeline.
 422 def : InstRW<[M55Write2IntFPE2], (instregex "MVE_VMOV_to_lane", "MVE_VMOV_q_rr")>;
 423
 424 def : InstRW<[M55WriteSysE2], (instregex "MVE_VCTP")>;
 425 def : InstRW<[M55WriteSysE2], (instregex "MVE_VPNOT")>;
 426 def : InstRW<[M55WriteSysE2], (instregex "MVE_VPST")>;
 427
 428
 429 // VFP instructions
 430
 431 def : SchedAlias<WriteFPCVT, M55WriteFloatE3>;
 432 def : SchedAlias<WriteFPMOV, M55WriteFloatE3>;
 433 def : SchedAlias<WriteFPALU32, M55WriteFloatE3>;
 434 def : SchedAlias<WriteFPALU64, M55WriteFloatE3Plus13>;
 435 def : SchedAlias<WriteFPMUL32, M55WriteFloatE3>;
 436 def : SchedAlias<WriteFPMUL64, M55WriteFloatE3Plus19>;
 437 def : SchedAlias<WriteFPMAC32, M55WriteFloatE3Plus2>;
 438 def : SchedAlias<WriteFPMAC64, M55WriteFloatE3Plus34>;
 439 def : SchedAlias<WriteFPDIV32, M55WriteFloatE3Plus14>;
 440 def : SchedAlias<WriteFPDIV64, M55WriteFloatE3Plus28>;
 441 def : SchedAlias<WriteFPSQRT32, M55WriteFloatE3Plus14>;
 442 def : SchedAlias<WriteFPSQRT64, M55WriteFloatE3Plus28>;
 443 def : ReadAdvance<ReadFPMUL, 0>;
 444 def : ReadAdvance<ReadFPMAC, 0>;
 445
 446 def : InstRW<[M55WriteLSE3], (instregex "VLD")>;
 447 def : InstRW<[M55WriteLSE2], (instregex "VST")>;
 448 def : InstRW<[M55WriteLSE3], (instregex "VLLD", "VLST")>;
 449
 450 def : InstRW<[M55WriteFloatE3], (instregex "VABS(H|S|D)")>;
 451 def : InstRW<[M55WriteFloatE3], (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S|D)")>;
 452 def : InstRW<[M55WriteFloatE3], (instregex "VCVT(B|T)(DH|HD)")>;
 453 def : InstRW<[M55WriteFloatE2], (instregex "VCMPZ?(E|H|S|D)")>;
 454 def : InstRW<[M55WriteFloatE3Plus7], (instregex "VDIVH")>;
 455 def : InstRW<[M55WriteFloatE3], (instregex "VFN?M(A|S)(H|S)")>; // VFMA
 456 def : InstRW<[M55WriteFloatE3Plus22], (instregex "VFN?M(A|S)D")>; // VFMA
 457 def : InstRW<[M55WriteFloatE3], (instregex "VFP_V(MAX|MIN)NM")>;
 458 def : InstRW<[M55WriteFloatE3], (instregex "VINSH$", "VMOVH$", "VMOVHR$", "VMOVSR$", "VMOVDRR$")>; // VINS, VMOVX, to-FP reg movs
 459 def : InstRW<[M55WriteFloatE2], (instregex "VMOVD$", "VMOVS$", "VMOVR")>; // Other VMOV's
 460 def : InstRW<[M55WriteFloatE2], (instregex "FCONSTH", "FCONSTS", "FCONSTD")>;
 461 def : InstRW<[M55WriteFloatE2], (instregex "VGETLNi32", "VSETLNi32")>;
 462 def : InstRW<[M55WriteFloatE2], (instregex "VMSR", "VMRS")>;
 463 def : InstRW<[M55WriteFloatE3Plus2], (instregex "VN?ML(A|S)H")>; // VMLA
 464 def : InstRW<[M55WriteFloatE3], (instregex "VNEG(H|S|D)")>;
 465 def : InstRW<[M55WriteFloatE3], (instregex "VRINT(A|M|N|P|R|X|Z)(H|S|D)")>;
 466 def : InstRW<[M55WriteFloatE3], (instregex "VSEL..(H|S|D)")>;
 467 def : InstRW<[M55WriteFloatE3Plus7], (instregex "VSQRTH")>;
 468
 469 def : WriteRes<WriteVLD1, []>;
 470 def : WriteRes<WriteVLD2, []>;
 471 def : WriteRes<WriteVLD3, []>;
 472 def : WriteRes<WriteVLD4, []>;
 473 def : WriteRes<WriteVST1, []>;
 474 def : WriteRes<WriteVST2, []>;
 475 def : WriteRes<WriteVST3, []>;
 476 def : WriteRes<WriteVST4, []>;
 477
 478 }