contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td

   1 //=- AArch64SchedM1.td - Samsung Exynos-M1 Scheduling Defs ---*- tablegen -*-=//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file defines the machine model for Samsung Exynos-M1 to support
  11 // instruction scheduling and other instruction cost heuristics.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 //===----------------------------------------------------------------------===//
  16 // The Exynos-M1 is a traditional superscalar microprocessor with a
  17 // 4-wide in-order stage for decode and dispatch and a wider issue stage.
  18 // The execution units and loads and stores are out-of-order.
  19
  20 def ExynosM1Model : SchedMachineModel {
  21   let IssueWidth            =  4; // Up to 4 uops per cycle.
  22   let MicroOpBufferSize     = 96; // ROB size.
  23   let LoopMicroOpBufferSize = 24; // Based on the instruction queue size.
  24   let LoadLatency           =  4; // Optimistic load cases.
  25   let MispredictPenalty     = 14; // Minimum branch misprediction penalty.
  26   let CompleteModel         =  0; // Use the default model otherwise.
  27 }
  28
  29 //===----------------------------------------------------------------------===//
  30 // Define each kind of processor resource and number available on the Exynos-M1,
  31 // which has 9 pipelines, each with its own queue with out-of-order dispatch.
  32
  33 def M1UnitA  : ProcResource<2>; // Simple integer
  34 def M1UnitC  : ProcResource<1>; // Simple and complex integer
  35 def M1UnitD  : ProcResource<1>; // Integer division (inside C, serialized)
  36 def M1UnitB  : ProcResource<2>; // Branch
  37 def M1UnitL  : ProcResource<1>; // Load
  38 def M1UnitS  : ProcResource<1>; // Store
  39 def M1PipeF0 : ProcResource<1>; // FP #0
  40 let Super = M1PipeF0 in {
  41   def M1UnitFMAC   : ProcResource<1>; // FP multiplication
  42   def M1UnitNAL0   : ProcResource<1>; // Simple vector
  43   def M1UnitNMISC  : ProcResource<1>; // Miscellanea
  44   def M1UnitFCVT   : ProcResource<1>; // FP conversion
  45   def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
  46 }
  47 def M1PipeF1 : ProcResource<1>; // FP #1
  48 let Super = M1PipeF1 in {
  49   def M1UnitFADD : ProcResource<1>; // Simple FP
  50   def M1UnitNAL1 : ProcResource<1>; // Simple vector
  51   def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
  52   def M1UnitFST  : ProcResource<1>; // FP store
  53 }
  54
  55 let SchedModel = ExynosM1Model in {
  56   def M1UnitALU  : ProcResGroup<[M1UnitA,
  57                                  M1UnitC]>;    // All integer
  58   def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
  59                                  M1UnitNAL1]>; // All simple vector
  60 }
  61
  62 let SchedModel = ExynosM1Model in {
  63
  64 //===----------------------------------------------------------------------===//
  65 // Coarse scheduling model for the Exynos-M1.
  66
  67 def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
  68 def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
  69 def M1WriteC1 : SchedWriteRes<[M1UnitC]>   { let Latency = 1; }
  70 def M1WriteC2 : SchedWriteRes<[M1UnitC]>   { let Latency = 2; }
  71
  72 def M1WriteB1 : SchedWriteRes<[M1UnitB]>      { let Latency = 1; }
  73
  74 def M1WriteL5 : SchedWriteRes<[M1UnitL]>   { let Latency = 5; }
  75 def M1WriteLA : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteL5,
  76                                                             M1WriteA1]>,
  77                                    SchedVar<NoSchedPred,   [M1WriteL5]>]>;
  78
  79 def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
  80 def M1WriteS2 : SchedWriteRes<[M1UnitS]> { let Latency = 2; }
  81 def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
  82 def M1WriteSA : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteS2,
  83                                                             M1WriteA1]>,
  84                                    SchedVar<NoSchedPred,   [M1WriteS1]>]>;
  85
  86 def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
  87                                       SchedVar<NoSchedPred,   [ReadDefault]>]>;
  88 def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
  89
  90 // Branch instructions.
  91 // NOTE: Unconditional direct branches actually take neither cycles nor units.
  92 def : WriteRes<WriteBr,    [M1UnitB]> { let Latency = 1; }
  93 def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
  94
  95 // Arithmetic and logical integer instructions.
  96 def : WriteRes<WriteI,     [M1UnitALU]> { let Latency = 1; }
  97 // TODO: Shift over 3 and some extensions take 2 cycles.
  98 def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
  99 def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
 100 def : WriteRes<WriteIS,    [M1UnitALU]> { let Latency = 1; }
 101
 102 // Move instructions.
 103 def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
 104
 105 // Divide and multiply instructions.
 106 def : WriteRes<WriteID32, [M1UnitC,
 107                            M1UnitD]> { let Latency = 13;
 108                                        let ResourceCycles = [1, 13]; }
 109 def : WriteRes<WriteID64, [M1UnitC,
 110                            M1UnitD]> { let Latency = 21;
 111                                        let ResourceCycles = [1, 21]; }
 112 // TODO: Long multiplication take 5 cycles and also the ALU.
 113 // TODO: Multiplication with accumulation can be advanced.
 114 def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
 115 // TODO: 64-bit multiplication has a throughput of 1/2.
 116 def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; }
 117
 118 // Miscellaneous instructions.
 119 def : WriteRes<WriteExtr, [M1UnitALU,
 120                            M1UnitALU]> { let Latency = 2; }
 121
 122 // TODO: The latency for the post or pre register is 1 cycle.
 123 def : WriteRes<WriteAdr, []> { let Latency = 0; }
 124
 125 // Load instructions.
 126 def : WriteRes<WriteLD,    [M1UnitL]>   { let Latency = 4; }
 127 def : WriteRes<WriteLDHi,  [M1UnitALU]> { let Latency = 4; }
 128 def : SchedAlias<WriteLDIdx, M1WriteLA>;
 129
 130 // Store instructions.
 131 def : WriteRes<WriteST,    [M1UnitS]> { let Latency = 1; }
 132 def : WriteRes<WriteSTP,   [M1UnitS]> { let Latency = 1; }
 133 def : WriteRes<WriteSTX,   [M1UnitS]> { let Latency = 1; }
 134 def : SchedAlias<WriteSTIdx, M1WriteSA>;
 135
 136 // FP data instructions.
 137 def : WriteRes<WriteF,    [M1UnitFADD]>  { let Latency = 3; }
 138 // TODO: FCCMP is much different.
 139 def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
 140 def : WriteRes<WriteFDiv, [M1UnitFVAR]>  { let Latency = 15;
 141                                            let ResourceCycles = [15]; }
 142 def : WriteRes<WriteFMul, [M1UnitFMAC]>  { let Latency = 4; }
 143
 144 // FP miscellaneous instructions.
 145 // TODO: Conversion between register files is much different.
 146 def : WriteRes<WriteFCvt,  [M1UnitFCVT]> { let Latency = 3; }
 147 def : WriteRes<WriteFImm,  [M1UnitNALU]> { let Latency = 1; }
 148 def : WriteRes<WriteFCopy, [M1UnitS]>    { let Latency = 4; }
 149
 150 // FP load instructions.
 151 // TODO: ASIMD loads are much different.
 152 def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
 153
 154 // FP store instructions.
 155 // TODO: ASIMD stores are much different.
 156 def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; }
 157
 158 // ASIMD FP instructions.
 159 def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
 160
 161 // Other miscellaneous instructions.
 162 def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
 163 def : WriteRes<WriteBarrier, []> { let Latency = 1; }
 164 def : WriteRes<WriteHint,    []> { let Latency = 1; }
 165 def : WriteRes<WriteSys,     []> { let Latency = 1; }
 166
 167 //===----------------------------------------------------------------------===//
 168 // Generic fast forwarding.
 169
 170 // TODO: Add FP register forwarding rules.
 171
 172 def : ReadAdvance<ReadI,       0>;
 173 def : ReadAdvance<ReadISReg,   0>;
 174 def : ReadAdvance<ReadIEReg,   0>;
 175 def : ReadAdvance<ReadIM,      0>;
 176 // Integer multiply-accumulate.
 177 // TODO: The forwarding for WriteIM64 saves actually 3 cycles.
 178 def : ReadAdvance<ReadIMA,     2, [WriteIM32, WriteIM64]>;
 179 def : ReadAdvance<ReadID,      0>;
 180 def : ReadAdvance<ReadExtrHi,  0>;
 181 def : ReadAdvance<ReadAdrBase, 0>;
 182 def : ReadAdvance<ReadVLD,     0>;
 183
 184 //===----------------------------------------------------------------------===//
 185 // Finer scheduling model for the Exynos-M1.
 186
 187 def M1WriteNEONA   : SchedWriteRes<[M1UnitNALU,
 188                                     M1UnitNALU,
 189                                     M1UnitFADD]>   { let Latency = 9; }
 190 def M1WriteNEONB   : SchedWriteRes<[M1UnitNALU,
 191                                     M1UnitFST]>    { let Latency = 5; }
 192 def M1WriteNEONC   : SchedWriteRes<[M1UnitNALU,
 193                                     M1UnitFST]>    { let Latency = 6; }
 194 def M1WriteNEOND   : SchedWriteRes<[M1UnitNALU,
 195                                     M1UnitFST,
 196                                     M1UnitL]>      { let Latency = 10; }
 197 def M1WriteNEONE   : SchedWriteRes<[M1UnitFCVT,
 198                                     M1UnitFST]>    { let Latency = 8; }
 199 def M1WriteNEONF   : SchedWriteRes<[M1UnitFCVT,
 200                                     M1UnitFST,
 201                                     M1UnitL]>      { let Latency = 13; }
 202 def M1WriteNEONG   : SchedWriteRes<[M1UnitNMISC,
 203                                     M1UnitFST]>    { let Latency = 6; }
 204 def M1WriteNEONH   : SchedWriteRes<[M1UnitNALU,
 205                                     M1UnitFST]>    { let Latency = 3; }
 206 def M1WriteNEONI   : SchedWriteRes<[M1UnitFST,
 207                                     M1UnitL]>      { let Latency = 9; }
 208 def M1WriteNEONJ   : SchedWriteRes<[M1UnitNMISC,
 209                                     M1UnitFMAC]>   { let Latency = 6; }
 210 def M1WriteNEONK   : SchedWriteRes<[M1UnitNMISC,
 211                                     M1UnitFMAC]>   { let Latency = 7; }
 212 def M1WriteFADD3   : SchedWriteRes<[M1UnitFADD]>   { let Latency = 3; }
 213 def M1WriteFCVT3   : SchedWriteRes<[M1UnitFCVT]>   { let Latency = 3; }
 214 def M1WriteFCVT4   : SchedWriteRes<[M1UnitFCVT]>   { let Latency = 4; }
 215 def M1WriteFMAC4   : SchedWriteRes<[M1UnitFMAC]>   { let Latency = 4; }
 216 def M1WriteFMAC5   : SchedWriteRes<[M1UnitFMAC]>   { let Latency = 5; }
 217 def M1WriteFVAR15  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 15;
 218                                                      let ResourceCycles = [15]; }
 219 def M1WriteFVAR23  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 23;
 220                                                      let ResourceCycles = [23]; }
 221 def M1WriteNALU1   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 1; }
 222 def M1WriteNALU2   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 2; }
 223 def M1WriteNAL11   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 1; }
 224 def M1WriteNAL12   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 2; }
 225 def M1WriteNAL13   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 3; }
 226 def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
 227 def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; }
 228 def M1WriteNMISC1  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 1; }
 229 def M1WriteNMISC2  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 2; }
 230 def M1WriteNMISC3  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 3; }
 231 def M1WriteNMISC4  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 4; }
 232 def M1WriteTB      : SchedWriteRes<[M1UnitC,
 233                                     M1UnitALU]>    { let Latency = 2; }
 234
 235 // Branch instructions
 236 def : InstRW<[M1WriteB1], (instrs Bcc)>;
 237 // NOTE: Conditional branch and link adds a B uop.
 238 def : InstRW<[M1WriteA1], (instrs BL)>;
 239 // NOTE: Indirect branch and link with LR adds an ALU uop.
 240 def : InstRW<[M1WriteA1,
 241               M1WriteC1], (instrs BLR)>;
 242 def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
 243 def : InstRW<[M1WriteC1,
 244               M1WriteA2], (instregex "^TBN?Z[WX]")>;
 245
 246 // Arithmetic and logical integer instructions.
 247 def : InstRW<[M1WriteA1], (instrs COPY)>;
 248
 249 // Divide and multiply instructions.
 250
 251 // Miscellaneous instructions.
 252
 253 // Load instructions.
 254
 255 // Store instructions.
 256
 257 // FP data instructions.
 258 def : InstRW<[M1WriteNALU1],  (instregex "^F(ABS|NEG)[DS]r")>;
 259 def : InstRW<[M1WriteFADD3],  (instregex "^F(ADD|SUB)[DS]rr")>;
 260 def : InstRW<[M1WriteNEONG],  (instregex "^FCCMPE?[DS]rr")>;
 261 def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>;
 262 def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>;
 263 def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>;
 264 def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>;
 265 def : InstRW<[M1WriteFMAC4],  (instregex "^FN?MUL[DS]rr")>;
 266 def : InstRW<[M1WriteFMAC5],  (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
 267 def : InstRW<[M1WriteFCVT3],  (instregex "^FRINT.+r")>;
 268 def : InstRW<[M1WriteNEONH],  (instregex "^FCSEL[DS]rrr")>;
 269 def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
 270 def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
 271
 272 // FP miscellaneous instructions.
 273 def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>;
 274 def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
 275 def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>;
 276 def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>;
 277 def : InstRW<[M1WriteS4],    (instregex "^FMOV[WX][DS](High)?r")>;
 278 def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>;
 279
 280 // FP load instructions.
 281
 282 // FP store instructions.
 283
 284 // ASIMD instructions.
 285 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
 286 def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>;
 287 def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>;
 288 def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>;
 289 def : InstRW<[M1WriteNALU1],  (instregex "^(ADD|NEG|SUB)v")>;
 290 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>;
 291 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>;
 292 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>;
 293 def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>;
 294 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>;
 295 def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>;
 296 def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
 297 def : InstRW<[M1WriteNALU1],  (instregex "^CMTSTv")>;
 298 def : InstRW<[M1WriteNALU1],  (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
 299 def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>;
 300 def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>;
 301 def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>;
 302 def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>;
 303 def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
 304 def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
 305 def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
 306 def : InstRW<[M1WriteNAL13],  (instregex "^(S|SR|U|UR)SRAv")>;
 307 def : InstRW<[M1WriteNALU1],  (instregex "^[SU]?SH(L|LL|R)2?v")>;
 308 def : InstRW<[M1WriteNALU1],  (instregex "^S[LR]Iv")>;
 309 def : InstRW<[M1WriteNAL13],  (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>;
 310 def : InstRW<[M1WriteNAL13],  (instregex "^[SU](Q|QR|R)SHLU?v")>;
 311
 312 // ASIMD FP instructions.
 313 def : InstRW<[M1WriteNALU1],  (instregex "^F(ABS|NEG)v")>;
 314 def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>;
 315 def : InstRW<[M1WriteNEONA],  (instregex "^FADDP")>;
 316 def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
 317 def : InstRW<[M1WriteFCVT3],  (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>;
 318 def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>;
 319 def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>;
 320 def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>;
 321 def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>;
 322 def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>;
 323 def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
 324 def : InstRW<[M1WriteNEONJ],  (instregex "^FMULX?v.i")>;
 325 def : InstRW<[M1WriteFMAC4],  (instregex "^FMULX?v.f")>;
 326 def : InstRW<[M1WriteNEONK],  (instregex "^FML[AS]v.i")>;
 327 def : InstRW<[M1WriteFMAC5],  (instregex "^FML[AS]v.f")>;
 328 def : InstRW<[M1WriteFCVT3],  (instregex "^FRINT[AIMNPXZ]v")>;
 329
 330 // ASIMD miscellaneous instructions.
 331 def : InstRW<[M1WriteNALU1],  (instregex "^RBITv")>;
 332 def : InstRW<[M1WriteNAL11],  (instregex "^(BIF|BIT|BSL)v")>;
 333 def : InstRW<[M1WriteNALU1],  (instregex "^CPY")>;
 334 def : InstRW<[M1WriteNEONB],  (instregex "^DUPv.+gpr")>;
 335 def : InstRW<[M1WriteNALU1],  (instregex "^DUPv.+lane")>;
 336 def : InstRW<[M1WriteNAL13],  (instregex "^[SU]?Q?XTU?Nv")>;
 337 def : InstRW<[M1WriteNEONC],  (instregex "^INSv.+gpr")>;
 338 def : InstRW<[M1WriteFCVT4],  (instregex "^[FU](RECP|RSQRT)Ev")>;
 339 def : InstRW<[M1WriteNMISC1], (instregex "^[FU](RECP|RSQRT)Xv")>;
 340 def : InstRW<[M1WriteFMAC5],  (instregex "^F(RECP|RSQRT)Sv")>;
 341 def : InstRW<[M1WriteNALU1],  (instregex "^REV(16|32|64)v")>;
 342 def : InstRW<[M1WriteNAL11],  (instregex "^TB[LX]v8i8One")>;
 343 def : InstRW<[WriteSequence<[M1WriteNAL11], 2>],
 344                               (instregex "^TB[LX]v8i8Two")>;
 345 def : InstRW<[WriteSequence<[M1WriteNAL11], 3>],
 346                               (instregex "^TB[LX]v8i8Three")>;
 347 def : InstRW<[WriteSequence<[M1WriteNAL11], 4>],
 348                               (instregex "^TB[LX]v8i8Four")>;
 349 def : InstRW<[M1WriteNAL12],  (instregex "^TB[LX]v16i8One")>;
 350 def : InstRW<[WriteSequence<[M1WriteNAL12], 2>],
 351                               (instregex "^TB[LX]v16i8Two")>;
 352 def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
 353                               (instregex "^TB[LX]v16i8Three")>;
 354 def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
 355                               (instregex "^TB[LX]v16i8Four")>;
 356 def : InstRW<[M1WriteNEOND],  (instregex "^[SU]MOVv")>;
 357 def : InstRW<[M1WriteNALU1],  (instregex "^INSv.+lane")>;
 358 def : InstRW<[M1WriteNALU1],  (instregex "^(TRN|UZP)[12](v8i8|v4i16|v2i32)")>;
 359 def : InstRW<[M1WriteNALU2],  (instregex "^(TRN|UZP)[12](v16i8|v8i16|v4i32|v2i64)")>;
 360 def : InstRW<[M1WriteNALU1],  (instregex "^ZIP[12]v")>;
 361
 362 // ASIMD load instructions.
 363
 364 // ASIMD store instructions.
 365
 366 // Cryptography instructions.
 367 def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
 368 def M1ReadAES  : SchedReadAdvance<1, [M1WriteAES]>;
 369 def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
 370 def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;
 371
 372 def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
 373 def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
 374 def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>;
 375 def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>;
 376 def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
 377
 378 // CRC instructions.
 379 def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
 380
 381 } // SchedModel = ExynosM1Model