1 //=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the machine model for Znver1 to support instruction
11 // scheduling and other instruction cost heuristics.
13 //===----------------------------------------------------------------------===//
15 def Znver1Model : SchedMachineModel {
16 // Zen can decode 4 instructions per cycle.
18 // Based on the reorder buffer we define MicroOpBufferSize
19 let MicroOpBufferSize = 192;
21 let MispredictPenalty = 17;
23 let PostRAScheduler = 1;
25 // FIXME: This variable is required for incomplete model.
26 // We haven't catered all instructions.
27 // So, we reset the value of this variable so as to
28 // say that the model is incomplete.
29 let CompleteModel = 0;
32 let SchedModel = Znver1Model in {
34 // Zen can issue micro-ops to 10 different units in one cycle.
36 // * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
37 // * Two AGU units (ZAGU0, ZAGU1)
38 // * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
39 // AGUs feed load store queues @two loads and 1 store per cycle.
41 // Four ALU units are defined below
42 def ZnALU0 : ProcResource<1>;
43 def ZnALU1 : ProcResource<1>;
44 def ZnALU2 : ProcResource<1>;
45 def ZnALU3 : ProcResource<1>;
47 // Two AGU units are defined below
48 def ZnAGU0 : ProcResource<1>;
49 def ZnAGU1 : ProcResource<1>;
51 // Four FPU units are defined below
52 def ZnFPU0 : ProcResource<1>;
53 def ZnFPU1 : ProcResource<1>;
54 def ZnFPU2 : ProcResource<1>;
55 def ZnFPU3 : ProcResource<1>;
58 def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>;
59 def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
60 def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>;
61 def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>;
62 def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>;
63 def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>;
64 def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>;
65 def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>;
67 // Below are the grouping of the units.
68 // Micro-ops to be issued to multiple units are tackled this way.
71 // ZnALU03 - 0,3 grouping
72 def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
74 // 56 Entry (14x4 entries) Int Scheduler
75 def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
79 // 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
80 // but are relevant for some instructions
81 def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
85 // Integer Multiplication issued on ALU1.
86 def ZnMultiplier : ProcResource<1>;
88 // Integer division issued on ALU2.
89 def ZnDivider : ProcResource<1>;
91 // 4 Cycles load-to use Latency is captured
92 def : ReadAdvance<ReadAfterLd, 4>;
94 // (a folded load is an instruction that loads and does some operation)
95 // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
96 // Instructions with folded loads are usually micro-fused, so they only appear
100 // This multiclass is for folded loads for integer units.
101 multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
102 ProcResourceKind ExePort,
104 // Register variant takes 1-cycle on Execution Port.
105 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
107 // Memory variant also uses a cycle on ZnAGU
108 // adds 4 cycles to the latency.
109 def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
111 let Latency = !add(Lat, 4);
115 // This multiclass is for folded loads for floating point units.
116 multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
117 ProcResourceKind ExePort,
119 // Register variant takes 1-cycle on Execution Port.
120 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
122 // Memory variant also uses a cycle on ZnAGU
123 // adds 7 cycles to the latency.
124 def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
125 let Latency = !add(Lat, 7);
129 // WriteRMW is set for instructions with Memory write
130 // operation in codegen
131 def : WriteRes<WriteRMW, [ZnAGU]>;
133 def : WriteRes<WriteStore, [ZnAGU]>;
134 def : WriteRes<WriteMove, [ZnALU]>;
135 def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
137 def : WriteRes<WriteZero, []>;
138 def : WriteRes<WriteLEA, [ZnALU]>;
139 defm : ZnWriteResPair<WriteALU, ZnALU, 1>;
140 defm : ZnWriteResPair<WriteShift, ZnALU, 1>;
141 defm : ZnWriteResPair<WriteJump, ZnALU, 1>;
143 // Treat misc copies as a move.
144 def : InstRW<[WriteMove], (instrs COPY)>;
147 def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> {
149 let ResourceCycles = [1, 41];
152 def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> {
154 let ResourceCycles = [1, 4, 41];
158 def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
161 def : WriteRes<WriteIMul, [ZnALU1, ZnMultiplier]> {
165 def : WriteRes<WriteIMulLd,[ZnALU1, ZnMultiplier]> {
169 // Floating point operations
170 defm : ZnWriteResFpuPair<WriteFHAdd, ZnFPU0, 3>;
171 defm : ZnWriteResFpuPair<WriteFAdd, ZnFPU0, 3>;
172 defm : ZnWriteResFpuPair<WriteFBlend, ZnFPU01, 1>;
173 defm : ZnWriteResFpuPair<WriteFVarBlend, ZnFPU01, 1>;
174 defm : ZnWriteResFpuPair<WriteVarBlend, ZnFPU0, 1>;
175 defm : ZnWriteResFpuPair<WriteCvtI2F, ZnFPU3, 5>;
176 defm : ZnWriteResFpuPair<WriteCvtF2F, ZnFPU3, 5>;
177 defm : ZnWriteResFpuPair<WriteCvtF2I, ZnFPU3, 5>;
178 defm : ZnWriteResFpuPair<WriteFDiv, ZnFPU3, 15>;
179 defm : ZnWriteResFpuPair<WriteFShuffle, ZnFPU12, 1>;
180 defm : ZnWriteResFpuPair<WriteFMul, ZnFPU0, 5>;
181 defm : ZnWriteResFpuPair<WriteFMA, ZnFPU03, 5>;
182 defm : ZnWriteResFpuPair<WriteFRcp, ZnFPU01, 5>;
183 defm : ZnWriteResFpuPair<WriteFRsqrt, ZnFPU01, 5>;
184 defm : ZnWriteResFpuPair<WriteFSqrt, ZnFPU3, 20>;
186 // Vector integer operations which uses FPU units
187 defm : ZnWriteResFpuPair<WriteVecShift, ZnFPU, 1>;
188 defm : ZnWriteResFpuPair<WriteVecLogic, ZnFPU, 1>;
189 defm : ZnWriteResFpuPair<WritePHAdd, ZnFPU, 1>;
190 defm : ZnWriteResFpuPair<WriteVecALU, ZnFPU, 1>;
191 defm : ZnWriteResFpuPair<WriteVecIMul, ZnFPU0, 4>;
192 defm : ZnWriteResFpuPair<WriteShuffle, ZnFPU, 1>;
193 defm : ZnWriteResFpuPair<WriteBlend, ZnFPU01, 1>;
194 defm : ZnWriteResFpuPair<WriteShuffle256, ZnFPU, 2>;
196 // Vector Shift Operations
197 defm : ZnWriteResFpuPair<WriteVarVecShift, ZnFPU12, 1>;
200 defm : ZnWriteResFpuPair<WriteAESDecEnc, ZnFPU01, 4>;
201 defm : ZnWriteResFpuPair<WriteAESIMC, ZnFPU01, 4>;
202 defm : ZnWriteResFpuPair<WriteAESKeyGen, ZnFPU01, 4>;
204 def : WriteRes<WriteFence, [ZnAGU]>;
205 def : WriteRes<WriteNop, []>;
207 // Following instructions with latency=100 are microcoded.
208 // We set long latency so as to block the entire pipeline.
209 defm : ZnWriteResFpuPair<WriteFShuffle256, ZnFPU, 100>;
211 //Microcoded Instructions
212 let Latency = 100 in {
213 def : WriteRes<WriteMicrocoded, []>;
214 def : WriteRes<WriteSystem, []>;
215 def : WriteRes<WriteMPSAD, []>;
216 def : WriteRes<WriteMPSADLd, []>;
217 def : WriteRes<WriteCLMul, []>;
218 def : WriteRes<WriteCLMulLd, []>;
219 def : WriteRes<WritePCmpIStrM, []>;
220 def : WriteRes<WritePCmpIStrMLd, []>;
221 def : WriteRes<WritePCmpEStrI, []>;
222 def : WriteRes<WritePCmpEStrILd, []>;
223 def : WriteRes<WritePCmpEStrM, []>;
224 def : WriteRes<WritePCmpEStrMLd, []>;
225 def : WriteRes<WritePCmpIStrI, []>;
226 def : WriteRes<WritePCmpIStrILd, []>;
229 //=== Regex based itineraries ===//
234 // - mm: 64 bit mmx register.
235 // - x = 128 bit xmm register.
236 // - (x)mm = mmx or xmm register.
237 // - y = 256 bit ymm register.
238 // - v = any vector register.
240 //=== Integer Instructions ===//
241 //-- Move instructions --//
244 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "MOV16rm")>;
248 def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
252 def : InstRW<[WriteALU],
253 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>;
255 def : InstRW<[WriteALULd, ReadAfterLd],
256 (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>;
260 def ZnWriteXCHG : SchedWriteRes<[ZnALU]> {
262 let ResourceCycles = [2];
265 def : InstRW<[ZnWriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>;
268 def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> {
272 def : InstRW<[ZnWriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
274 def : InstRW<[WriteMicrocoded], (instregex "XLAT")>;
278 def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{
282 def : InstRW<[ZnWritePop16r], (instregex "POP16rmm")>;
283 def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
284 def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
288 // r. Has default values.
290 def ZnWritePUSH : SchedWriteRes<[ZnAGU]>{
293 def : InstRW<[ZnWritePUSH], (instregex "PUSH(16|32)rmm")>;
296 def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
299 def ZnWritePushA : SchedWriteRes<[ZnAGU]> {
302 def : InstRW<[ZnWritePushA], (instregex "PUSHA(16|32)")>;
305 def : InstRW<[WriteMicrocoded], (instregex "LAHF")>;
308 def ZnWriteSAHF : SchedWriteRes<[ZnALU]> {
312 def : InstRW<[ZnWriteSAHF], (instregex "SAHF")>;
315 def ZnWriteBSwap : SchedWriteRes<[ZnALU]> {
316 let ResourceCycles = [4];
318 def : InstRW<[ZnWriteBSwap], (instregex "BSWAP")>;
322 def ZnWriteMOVBE : SchedWriteRes<[ZnAGU, ZnALU]> {
325 def : InstRW<[ZnWriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
328 def : InstRW<[ZnWriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
330 //-- Arithmetic instructions --//
334 def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
335 "(ADD|SUB)(8|16|32|64)mi8",
340 def : InstRW<[WriteALU], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)",
341 "(ADC|SBB)(16|32|64)ri8",
343 "(ADC|SBB)(8|16|32|64)rr_REV")>;
346 def : InstRW<[WriteALULd, ReadAfterLd],
347 (instregex "(ADC|SBB)(8|16|32|64)rm")>;
350 def : InstRW<[WriteALULd],
351 (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
352 "(ADC|SBB)(16|32|64)mi8",
357 def : InstRW<[WriteALULd],
358 (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m",
359 "(INC|DEC)64(16|32)m")>;
363 def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
366 def : InstRW<[ZnWriteMul16], (instregex "IMUL16r", "MUL16r")>;
369 def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
372 def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instregex "IMUL16m", "MUL16m")>;
375 def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
378 def : InstRW<[ZnWriteMul32], (instregex "IMUL32r", "MUL32r")>;
381 def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
384 def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instregex "IMUL32m", "MUL32m")>;
387 def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
391 def : InstRW<[ZnWriteMul64], (instregex "IMUL64r", "MUL64r")>;
394 def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
398 def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instregex "IMUL64m", "MUL64m")>;
401 def ZnWriteMul16rri : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
404 def : InstRW<[ZnWriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>;
407 def ZnWriteMul16rmi : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
410 def : InstRW<[ZnWriteMul16rmi, ReadAfterLd], (instregex "IMUL16rmi", "IMUL16rmi8")>;
414 def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
416 let ResourceCycles = [1, 2];
418 def : InstRW<[ZnWriteMulX32], (instregex "MULX32rr")>;
421 def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
423 let ResourceCycles = [1, 2, 2];
425 def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instregex "MULX32rm")>;
428 def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> {
431 def : InstRW<[ZnWriteMulX64], (instregex "MULX64rr")>;
434 def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
437 def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instregex "MULX64rm")>;
441 def ZnWriteDiv8 : SchedWriteRes<[ZnALU2, ZnDivider]> {
444 def : InstRW<[ZnWriteDiv8], (instregex "DIV8r", "IDIV8r")>;
447 def ZnWriteDiv16 : SchedWriteRes<[ZnALU2, ZnDivider]> {
451 def : InstRW<[ZnWriteDiv16], (instregex "DIV16r", "IDIV16r")>;
454 def ZnWriteDiv32 : SchedWriteRes<[ZnALU2, ZnDivider]> {
458 def : InstRW<[ZnWriteDiv32], (instregex "DIV32r", "IDIV32r")>;
461 def ZnWriteDiv64 : SchedWriteRes<[ZnALU2, ZnDivider]> {
465 def : InstRW<[ZnWriteDiv64], (instregex "DIV64r", "IDIV64r")>;
467 //-- Control transfer instructions --//
470 def ZnWriteJCXZ : SchedWriteRes<[ZnALU03]>;
471 def : InstRW<[ZnWriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>;
474 def : InstRW<[WriteMicrocoded], (instregex "INTO")>;
477 def ZnWriteLOOP : SchedWriteRes<[ZnALU03]>;
478 def : InstRW<[ZnWriteLOOP], (instregex "LOOP")>;
480 // LOOP(N)E, LOOP(N)Z
481 def ZnWriteLOOPE : SchedWriteRes<[ZnALU03]>;
482 def : InstRW<[ZnWriteLOOPE], (instregex "LOOPE", "LOOPNE",
487 def ZnWriteCALLr : SchedWriteRes<[ZnAGU, ZnALU03]>;
488 def : InstRW<[ZnWriteCALLr], (instregex "CALL(16|32)r")>;
490 def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
493 def ZnWriteRET : SchedWriteRes<[ZnALU03]> {
496 def : InstRW<[ZnWriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
497 "IRET(D|Q)", "RETF")>;
499 //-- Logic instructions --//
503 def : InstRW<[WriteALULd],
504 (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
505 "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
509 def : InstRW<[WriteALU], (instregex "ANDN(32|64)rr")>;
511 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "ANDN(32|64)rm")>;
513 // Define ALU latency variants
514 def ZnWriteALULat2 : SchedWriteRes<[ZnALU]> {
517 def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
521 def ZnWriteALULat3 : SchedWriteRes<[ZnALU]> {
524 def ZnWriteALULat3Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
530 def : InstRW<[ZnWriteALULat3], (instregex "BS(R|F)(16|32|64)rr")>;
532 def : InstRW<[ZnWriteALULat3Ld, ReadAfterLd], (instregex "BS(R|F)(16|32|64)rm")>;
536 def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>;
538 def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mr")>;
539 def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>;
543 def ZnWriteBTRSC : SchedWriteRes<[ZnALU]> {
547 def : InstRW<[ZnWriteBTRSC], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>;
551 def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> {
556 def : InstRW<[ZnWriteBTRSCm], (instregex "BT(R|S|C)(16|32|64)m(r|i8)")>;
560 def : InstRW<[ZnWriteALULat2], (instregex "BLS(I|MSK|R)(32|64)rr")>;
562 def : InstRW<[ZnWriteALULat2Ld, ReadAfterLd], (instregex "BLS(I|MSK|R)(32|64)rm")>;
566 def : InstRW<[WriteALU], (instregex "BEXTR(32|64)rr")>;
568 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "BEXTR(32|64)rm")>;
572 def : InstRW<[WriteALU], (instregex "BZHI(32|64)rr")>;
574 def : InstRW<[WriteALULd, ReadAfterLd], (instregex "BZHI(32|64)rm")>;
577 def : InstRW<[WriteALU], (instregex "STD", "CLD")>;
581 def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
583 def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
586 def : InstRW<[WriteShift], (instregex "RO(R|L)(8|16|32|64)r1")>;
590 def : InstRW<[WriteShift], (instregex "RC(R|L)(8|16|32|64)r1")>;
593 def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m1")>;
596 def : InstRW<[WriteShift], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>;
599 def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>;
603 def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
607 def : InstRW<[WriteShift], (instregex "SH(R|L)D(16|32|64)rri8")>;
610 def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
613 def : InstRW<[WriteMicrocoded], (instregex "SHLD(16|32|64)rrCL")>;
616 def : InstRW<[WriteMicrocoded], (instregex "SHRD(16|32|64)rrCL")>;
619 def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
623 def : InstRW<[WriteShift],
624 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>;
626 def : InstRW<[WriteShift],
627 (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>;
631 def : InstRW<[ZnWriteALULat2], (instregex "(LZCNT|TZCNT)(16|32|64)rr")>;
633 def : InstRW<[ZnWriteALULat2Ld, ReadAfterLd], (instregex "(LZCNT|TZCNT)(16|32|64)rm")>;
635 //-- Misc instructions --//
637 def ZnWriteCMPXCHG : SchedWriteRes<[ZnAGU, ZnALU]> {
641 def : InstRW<[ZnWriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>;
644 def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> {
645 let NumMicroOps = 18;
647 def : InstRW<[ZnWriteCMPXCHG8B], (instregex "CMPXCHG8B")>;
649 def : InstRW<[WriteMicrocoded], (instregex "CMPXCHG16B")>;
652 def ZnWriteLEAVE : SchedWriteRes<[ZnALU, ZnAGU]> {
656 def : InstRW<[ZnWriteLEAVE], (instregex "LEAVE")>;
659 def : InstRW<[WriteMicrocoded], (instregex "PAUSE")>;
662 def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
665 def : InstRW<[WriteMicrocoded], (instregex "RDPMC")>;
668 def : InstRW<[WriteMicrocoded], (instregex "RDRAND(16|32|64)r")>;
671 def : InstRW<[WriteMicrocoded], (instregex "XGETBV")>;
673 //-- String instructions --//
675 def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>;
678 def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>;
681 def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>;
684 def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>;
687 def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
690 def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
693 def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
695 //=== Floating Point x87 Instructions ===//
696 //-- Move instructions --//
698 def ZnWriteFLDr : SchedWriteRes<[ZnFPU13]> ;
700 def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> {
707 def : InstRW<[ZnWriteFLDr], (instregex "LD_Frr")>;
710 def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> {
713 def : InstRW<[ZnWriteLD_F80m], (instregex "LD_F80m")>;
716 def : InstRW<[WriteMicrocoded], (instregex "FBLDm")>;
720 def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>;
723 def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> {
726 def : InstRW<[ZnWriteST_FP80m], (instregex "ST_FP80m")>;
730 def : InstRW<[WriteMicrocoded], (instregex "FBSTPm")>;
732 def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>;
735 def : InstRW<[ZnWriteFXCH], (instregex "XCH_F")>;
738 def ZnWriteFILD : SchedWriteRes<[ZnAGU, ZnFPU3]> {
742 def : InstRW<[ZnWriteFILD], (instregex "ILD_F(16|32|64)m")>;
745 def ZnWriteFIST : SchedWriteRes<[ZnAGU, ZnFPU23]> {
748 def : InstRW<[ZnWriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
750 def ZnWriteFPU13 : SchedWriteRes<[ZnAGU, ZnFPU13]> {
754 def ZnWriteFPU3 : SchedWriteRes<[ZnAGU, ZnFPU3]> {
759 def : InstRW<[ZnWriteFPU13], (instregex "LD_F0")>;
762 def : InstRW<[ZnWriteFPU3], (instregex "LD_F1")>;
765 def : InstRW<[ZnWriteFPU3], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>;
767 def : InstRW<[WriteMicrocoded], (instregex "CMOV(B|BE|E|P|NB|NBE|NE|NP)_F")>;
771 def : InstRW<[WriteMicrocoded], (instregex "FNSTSW16r")>;
774 def : InstRW<[WriteMicrocoded], (instregex "FNSTSWm")>;
777 def : InstRW<[WriteMicrocoded], (instregex "FLDCW16m")>;
780 def : InstRW<[WriteMicrocoded], (instregex "FNSTCW16m")>;
783 def : InstRW<[ZnWriteFPU3], (instregex "FINCSTP", "FDECSTP")>;
786 def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>;
789 def : InstRW<[WriteMicrocoded], (instregex "FSAVEm")>;
792 def : InstRW<[WriteMicrocoded], (instregex "FRSTORm")>;
794 //-- Arithmetic instructions --//
796 def ZnWriteFPU3Lat2 : SchedWriteRes<[ZnFPU3]> {
800 def ZnWriteFPU3Lat2Ld : SchedWriteRes<[ZnAGU, ZnFPU3]> {
804 def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ;
806 def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ;
808 def ZnWriteFPU0Lat1Ld : SchedWriteRes<[ZnAGU, ZnFPU0]> {
813 def : InstRW<[ZnWriteFPU3Lat2], (instregex "ABS_F")>;
816 def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>;
820 def : InstRW<[ZnWriteFPU0Lat1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr",
823 def : InstRW<[ZnWriteFPU0Lat1Ld], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>;
827 def : InstRW<[ZnWriteFPU0Lat1], (instregex "FCOMPP", "UCOM_FPPr")>;
829 def ZnWriteFPU02 : SchedWriteRes<[ZnAGU, ZnFPU02]>
834 // FCOMI(P) FUCOMI(P).
836 def : InstRW<[ZnWriteFPU02], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr",
839 def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]>
843 let ResourceCycles = [1,3];
847 def : InstRW<[ZnWriteFPU03], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>;
850 def : InstRW<[ZnWriteFPU0Lat1], (instregex "TST_F")>;
853 def : InstRW<[ZnWriteFPU3Lat1], (instregex "FXAM")>;
856 def : InstRW<[WriteMicrocoded], (instregex "FPREM")>;
859 def : InstRW<[WriteMicrocoded], (instregex "FPREM1")>;
862 def : InstRW<[WriteMicrocoded], (instregex "FRNDINT")>;
865 def : InstRW<[WriteMicrocoded], (instregex "FSCALE")>;
868 def : InstRW<[WriteMicrocoded], (instregex "FXTRACT")>;
871 def : InstRW<[ZnWriteFPU0Lat1], (instregex "FNOP")>;
874 def : InstRW<[ZnWriteFPU0Lat1], (instregex "WAIT")>;
877 def : InstRW<[WriteMicrocoded], (instregex "FNCLEX")>;
880 def : InstRW<[WriteMicrocoded], (instregex "FNINIT")>;
882 //=== Integer MMX and XMM Instructions ===//
883 //-- Move instructions --//
885 // Moves from GPR to FPR incurs a penalty
886 def ZnWriteFPU2 : SchedWriteRes<[ZnFPU2]> {
890 // Move to ALU doesn't incur penalty
891 def ZnWriteToALU2 : SchedWriteRes<[ZnFPU2]> {
895 def ZnWriteFPU : SchedWriteRes<[ZnFPU]>;
896 def ZnWriteFPUY : SchedWriteRes<[ZnFPU]> {
903 def : InstRW<[ZnWriteToALU2], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr",
904 "VMOVPDI2DIrr", "MOVPDI2DIrr")>;
907 def : InstRW<[ZnWriteFPU2], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr",
908 "VMOVDI2PDIrr", "MOVDI2PDIrr")>;
912 def : InstRW<[ZnWriteToALU2], (instregex "VMOVPQIto64rr")>;
915 def : InstRW<[ZnWriteFPU2], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>;
918 def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVQ64rr")>;
922 def : InstRW<[ZnWriteFPU], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr",
923 "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV")>;
926 def : InstRW<[ZnWriteFPUY], (instregex "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>;
929 def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVDQ2Qrr")>;
932 def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVQ2DQrr")>;
936 def ZnWriteFPU12 : SchedWriteRes<[ZnFPU12]> ;
937 def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU12]> {
940 def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ;
942 def : InstRW<[ZnWriteFPU12], (instregex "MMX_PACKSSDWirr",
943 "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>;
944 def : InstRW<[ZnWriteFPU12m], (instregex "MMX_PACKSSDWirm",
945 "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>;
947 // VPMOVSX/ZX BW BD BQ DW DQ.
949 def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>;
951 def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
952 def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> {
955 def ZnWriteFPU013m : SchedWriteRes<[ZnAGU, ZnFPU013]> {
959 def ZnWriteFPU013Ld : SchedWriteRes<[ZnAGU, ZnFPU013]> {
963 def ZnWriteFPU013LdY : SchedWriteRes<[ZnAGU, ZnFPU013]> {
970 def : InstRW<[ZnWriteFPU013], (instregex "(V?)PBLENDWrri")>;
972 def : InstRW<[ZnWriteFPU013Y], (instregex "(V?)PBLENDWYrri")>;
975 def : InstRW<[ZnWriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
977 def : InstRW<[ZnWriteFPU013LdY], (instregex "(V?)PBLENDWYrmi")>;
979 def ZnWriteFPU01 : SchedWriteRes<[ZnFPU01]> ;
980 def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> {
986 def : InstRW<[ZnWriteFPU01], (instregex "VPBLENDDrri")>;
988 def : InstRW<[ZnWriteFPU01Y], (instregex "VPBLENDDYrri")>;
991 def ZnWriteFPU01Op2 : SchedWriteRes<[ZnAGU, ZnFPU01]> {
994 let ResourceCycles = [1, 2];
996 def ZnWriteFPU01Op2Y : SchedWriteRes<[ZnAGU, ZnFPU01]> {
999 let ResourceCycles = [1, 3];
1001 def : InstRW<[ZnWriteFPU01Op2], (instregex "VPBLENDDrmi")>;
1002 def : InstRW<[ZnWriteFPU01Op2Y], (instregex "VPBLENDDYrmi")>;
1005 def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
1008 def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
1012 def : InstRW<[ZnWriteFPU01Op2],(instregex "VPMASKMOVQrm")>;
1013 def : InstRW<[ZnWriteFPU01Op2Y],(instregex "VPMASKMOVQYrm")>;
1015 def : InstRW<[WriteMicrocoded],
1016 (instregex "VPMASKMOVD(Y?)rm")>;
1018 def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
1021 def ZnWritePMOVMSKB : SchedWriteRes<[ZnFPU2]> {
1022 let NumMicroOps = 2;
1024 def ZnWritePMOVMSKBY : SchedWriteRes<[ZnFPU2]> {
1027 def : InstRW<[ZnWritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKBrr")>;
1028 def : InstRW<[ZnWritePMOVMSKBY], (instregex "(V|MMX_)?PMOVMSKBYrr")>;
1032 def ZnWritePEXTRr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
1034 let ResourceCycles = [1, 2];
1036 def : InstRW<[ZnWritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>;
1038 def ZnWritePEXTRm : SchedWriteRes<[ZnAGU, ZnFPU12, ZnFPU2]> {
1040 let NumMicroOps = 2;
1041 let ResourceCycles = [1, 2, 3];
1044 def : InstRW<[ZnWritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>;
1048 def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
1050 let NumMicroOps = 2;
1051 let ResourceCycles = [1, 2];
1053 def : InstRW<[ZnWriteVPBROADCAST128Ld],
1054 (instregex "VPBROADCAST(B|W)rm")>;
1057 def ZnWriteVPBROADCAST256Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
1059 let NumMicroOps = 2;
1060 let ResourceCycles = [1, 2];
1062 def : InstRW<[ZnWriteVPBROADCAST256Ld],
1063 (instregex "VPBROADCAST(B|W)Yrm")>;
1066 def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
1068 //-- Arithmetic instructions --//
1071 // PHADD|PHSUB (S) W/D.
1072 def : InstRW<[WriteMicrocoded], (instregex "MMX_PHADD(W?)r(r|m)64",
1073 "MMX_PHADDSWr(r|m)64",
1074 "MMX_PHSUB(W|D)r(r|m)64",
1076 "(V?)PH(ADD|SUB)(W|D)(Y?)r(r|m)",
1077 "(V?)PH(ADD|SUB)SWr(r|m)(256)?")>;
1081 def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>;
1082 def : InstRW<[ZnWritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
1085 def ZnWritePCMPGTQm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
1089 def ZnWritePCMPGTQYm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
1091 let NumMicroOps = 2;
1092 let ResourceCycles = [1,2];
1094 def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
1095 def : InstRW<[ZnWritePCMPGTQYm], (instregex "(V?)PCMPGTQYrm")>;
1099 def ZnWritePMULLDr : SchedWriteRes<[ZnFPU0]> {
1103 def ZnWritePMULLDYr : SchedWriteRes<[ZnFPU0]> {
1105 let ResourceCycles = [2];
1107 def : InstRW<[ZnWritePMULLDr], (instregex "(V?)PMULLDrr")>;
1108 def : InstRW<[ZnWritePMULLDYr], (instregex "(V?)PMULLDYrr")>;
1111 def ZnWritePMULLDm : SchedWriteRes<[ZnAGU, ZnFPU0]> {
1113 let NumMicroOps = 2;
1116 def ZnWritePMULLDYm : SchedWriteRes<[ZnAGU, ZnFPU0]> {
1118 let NumMicroOps = 2;
1119 let ResourceCycles = [1, 2];
1121 def : InstRW<[ZnWritePMULLDm], (instregex "(V?)PMULLDrm")>;
1122 def : InstRW<[ZnWritePMULLDYm], (instregex "(V?)PMULLDYrm")>;
1124 //-- Logic instructions --//
1128 def ZnWritePTESTr : SchedWriteRes<[ZnFPU12]> {
1129 let ResourceCycles = [2];
1131 def : InstRW<[ZnWritePTESTr], (instregex "(V?)PTEST(Y?)rr")>;
1134 def ZnWritePTESTm : SchedWriteRes<[ZnAGU, ZnFPU12]> {
1136 let NumMicroOps = 2;
1137 let ResourceCycles = [1, 2];
1139 def : InstRW<[ZnWritePTESTm], (instregex "(V?)PTEST(Y?)rm")>;
1141 // PSLL,PSRL,PSRA W/D/Q.
1143 def ZnWritePShift : SchedWriteRes<[ZnFPU2]> ;
1144 def ZnWritePShiftY : SchedWriteRes<[ZnFPU2]> {
1147 def ZnWritePShiftLd : SchedWriteRes<[ZnAGU,ZnFPU2]> {
1150 def ZnWritePShiftYLd : SchedWriteRes<[ZnAGU, ZnFPU2]> {
1153 def : InstRW<[ZnWritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)rr")>;
1154 def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)Yrr")>;
1156 def : InstRW<[ZnWritePShiftLd], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)rm")>;
1157 def : InstRW<[ZnWritePShiftYLd], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)Yrm")>;
1160 def : InstRW<[ZnWritePShift], (instregex "(V?)PS(R|L)LDQri")>;
1161 def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
1163 //=== Floating Point XMM and YMM Instructions ===//
1164 //-- Move instructions --//
1168 def ZnWriteMOVMSKPr : SchedWriteRes<[ZnFPU2]> ;
1169 def : InstRW<[ZnWriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)(Y?)rr")>;
1172 def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rr")>;
1173 def : InstRW<[WriteMicrocoded], (instregex "VPERM2F128rm")>;
1176 def ZnWriteFPU01Lat3 : SchedWriteRes<[ZnFPU013]> {
1179 def ZnWriteFPU01Lat3Ld : SchedWriteRes<[ZnAGU, ZnFPU013]> {
1181 let NumMicroOps = 2;
1182 let ResourceCycles = [1, 2];
1184 def : InstRW<[ZnWriteFPU01Lat3], (instregex "BLENDVP(S|D)rr0")>;
1185 def : InstRW<[ZnWriteFPU01Lat3Ld, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>;
1187 def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
1188 let NumMicroOps = 2;
1192 def : InstRW<[ZnWriteBROADCAST], (instregex "VBROADCASTF128")>;
1196 def ZnWriteEXTRACTPSr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
1198 let NumMicroOps = 2;
1199 let ResourceCycles = [1, 2];
1201 def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
1203 def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> {
1205 let NumMicroOps = 2;
1206 let ResourceCycles = [5, 1, 2];
1209 def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
1213 def : InstRW<[ZnWriteFPU013], (instregex "VEXTRACTF128rr")>;
1216 def : InstRW<[ZnWriteFPU013m], (instregex "VEXTRACTF128mr")>;
1218 def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> {
1220 let ResourceCycles = [2];
1222 def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
1224 let NumMicroOps = 2;
1225 let ResourceCycles = [1, 2];
1229 def : InstRW<[ZnWriteVINSERT128r], (instregex "VINSERTF128rr")>;
1230 def : InstRW<[ZnWriteVINSERT128Ld], (instregex "VINSERTF128rm")>;
1234 def ZnWriteVMASKMOVPLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
1238 def ZnWriteVMASKMOVPLdY : SchedWriteRes<[ZnAGU, ZnFPU01]> {
1240 let NumMicroOps = 2;
1241 let ResourceCycles = [1, 2];
1243 def ZnWriteVMASKMOVPm : SchedWriteRes<[ZnAGU, ZnFPU01]> {
1246 def : InstRW<[ZnWriteVMASKMOVPLd], (instregex "VMASKMOVP(S|D)rm")>;
1247 def : InstRW<[ZnWriteVMASKMOVPLdY], (instregex "VMASKMOVP(S|D)Yrm")>;
1248 def : InstRW<[ZnWriteVMASKMOVPm], (instregex "VMASKMOVP(S|D)mr")>;
1251 def ZnWriteVMASKMOVPYmr : SchedWriteRes<[ZnAGU,ZnFPU01]> {
1253 let NumMicroOps = 2;
1254 let ResourceCycles = [1, 2];
1256 def : InstRW<[ZnWriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>;
1260 def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSrm")>;
1262 def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSYrm")>;
1266 def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPSrm")>;
1269 def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPSYrm")>;
1273 def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPDrm")>;
1276 def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPDYrm")>;
1280 def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPDrm")>;
1283 def : InstRW<[WriteMicrocoded], (instregex "VGATHERQPDYrm")>;
1285 //-- Conversion instructions --//
1286 def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
1291 def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(V?)CVTPD2PSrr")>;
1293 def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
1295 let NumMicroOps = 2;
1296 let ResourceCycles = [1,2];
1299 def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(V?)CVTPD2PS(X?)rm")>;
1302 def ZnWriteCVTPD2PSYr : SchedWriteRes<[ZnFPU3]> {
1305 def : InstRW<[ZnWriteCVTPD2PSYr], (instregex "(V?)CVTPD2PSYrr")>;
1308 def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1311 def : InstRW<[ZnWriteCVTPD2PSYLd], (instregex "(V?)CVTPD2PSYrm")>;
1315 // Same as WriteCVTPD2PSr
1316 def : InstRW<[ZnWriteCVTPD2PSr], (instregex "(Int_)?(V)?CVTSD2SSrr")>;
1319 def : InstRW<[ZnWriteCVTPD2PSLd], (instregex "(Int_)?(V)?CVTSD2SSrm")>;
1323 def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> {
1326 def : InstRW<[ZnWriteCVTPS2PDr], (instregex "(V?)CVTPS2PDrr")>;
1330 def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1332 let NumMicroOps = 2;
1334 def : InstRW<[ZnWriteCVTPS2PDLd], (instregex "(V?)CVTPS2PD(Y?)rm")>;
1337 def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
1340 def : InstRW<[ZnWriteVCVTPS2PDY], (instregex "VCVTPS2PDYrr")>;
1344 def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> {
1347 def : InstRW<[ZnWriteCVTSS2SDr], (instregex "(Int_)?(V?)CVTSS2SDrr")>;
1350 def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1352 let NumMicroOps = 2;
1353 let ResourceCycles = [1, 2];
1355 def : InstRW<[ZnWriteCVTSS2SDLd], (instregex "(Int_)?(V?)CVTSS2SDrm")>;
1357 def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> {
1362 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
1366 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "VCVTDQ2PDYrr")>;
1368 def ZnWriteCVTPD2DQr: SchedWriteRes<[ZnFPU12, ZnFPU3]> {
1373 def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V?)CVT(T?)PD2DQrr")>;
1375 def ZnWriteCVTPD2DQLd: SchedWriteRes<[ZnAGU,ZnFPU12,ZnFPU3]> {
1377 let NumMicroOps = 2;
1380 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
1381 // same as xmm handling
1383 def : InstRW<[ZnWriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
1385 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
1386 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQ(64)?rm")>;
1388 def ZnWriteCVTPS2PIr: SchedWriteRes<[ZnFPU3]> {
1393 def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIirr")>;
1397 def : InstRW<[ZnWriteCVTPS2PDr], (instregex "MMX_CVT(T?)PI2PDirr")>;
1401 def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIirr")>;
1403 def ZnWriteCVSTSI2SSr: SchedWriteRes<[ZnFPU3]> {
1408 def : InstRW<[ZnWriteCVSTSI2SSr], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>;
1410 // same as CVTPD2DQr
1413 def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>;
1414 // same as CVTPD2DQm
1416 def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>;
1418 def ZnWriteCVSTSI2SDr: SchedWriteRes<[ZnFPU013, ZnFPU3]> {
1423 def : InstRW<[ZnWriteCVSTSI2SDr], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>;
1426 def ZnWriteCVSTSI2SIr: SchedWriteRes<[ZnFPU3, ZnFPU2]> {
1429 def ZnWriteCVSTSI2SILd: SchedWriteRes<[ZnAGU, ZnFPU3, ZnFPU2]> {
1434 def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(Int_)?CVT(T?)SD2SI(64)?rr")>;
1436 def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(Int_)?CVT(T?)SD2SI(64)?rm")>;
1439 def ZnWriteVCVSTSI2SIr: SchedWriteRes<[ZnFPU3]> {
1442 def ZnWriteVCVSTSI2SILd: SchedWriteRes<[ZnFPU3, ZnAGU]> {
1447 def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(Int_)?VCVT(T?)SD2SI(64)?rr")>;
1449 def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(Int_)?VCVT(T?)SD2SI(64)?rm")>;
1453 def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)rr")>;
1455 def : InstRW<[WriteMicrocoded], (instregex "VCVTPS2PH(Y?)mr")>;
1459 def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rr")>;
1461 def : InstRW<[WriteMicrocoded], (instregex "VCVTPH2PS(Y?)rm")>;
1463 //-- SSE4A instructions --//
1465 def ZnWriteEXTRQ: SchedWriteRes<[ZnFPU12, ZnFPU2]> {
1468 def : InstRW<[ZnWriteEXTRQ], (instregex "EXTRQ")>;
1471 def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> {
1474 def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
1477 def ZnWriteMOVNT: SchedWriteRes<[ZnAGU,ZnFPU2]> {
1480 def : InstRW<[ZnWriteMOVNT], (instregex "MOVNTS(S|D)")>;
1482 //-- SHA instructions --//
1484 def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
1486 // SHA1MSG1, SHA256MSG1
1488 def ZnWriteSHA1MSG1r : SchedWriteRes<[ZnFPU12]> {
1490 let ResourceCycles = [2];
1492 def : InstRW<[ZnWriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
1494 def ZnWriteSHA1MSG1Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
1496 let ResourceCycles = [1,2];
1498 def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
1502 def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ;
1503 def : InstRW<[ZnWriteSHA1MSG2r], (instregex "SHA1MSG2rr")>;
1505 def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
1508 def : InstRW<[ZnWriteSHA1MSG2Ld], (instregex "SHA1MSG2rm")>;
1512 def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ;
1513 def : InstRW<[ZnWriteSHA1NEXTEr], (instregex "SHA1NEXTErr")>;
1515 def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> {
1518 def : InstRW<[ZnWriteSHA1NEXTELd], (instregex "SHA1NEXTErm")>;
1522 def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> {
1525 def : InstRW<[ZnWriteSHA1RNDS4r], (instregex "SHA1RNDS4rr")>;
1527 def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
1530 def : InstRW<[ZnWriteSHA1RNDS4Ld], (instregex "SHA1RNDS4rm")>;
1534 def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> {
1537 def : InstRW<[ZnWriteSHA256RNDS2r], (instregex "SHA256RNDS2rr")>;
1539 def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
1542 def : InstRW<[ZnWriteSHA256RNDS2Ld], (instregex "SHA256RNDS2rm")>;
1544 //-- Arithmetic instructions --//
1547 def : InstRW<[WriteMicrocoded], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)r(r|m)")>;
1549 // MULL SS/SD PS/PD.
1551 def ZnWriteMULr : SchedWriteRes<[ZnFPU01]> {
1555 def ZnWriteMULYr : SchedWriteRes<[ZnFPU01]> {
1558 def : InstRW<[ZnWriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>;
1559 def : InstRW<[ZnWriteMULYr], (instregex "(V?)MUL(P|S)(S|D)Yrr")>;
1562 def ZnWriteMULLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
1564 let NumMicroOps = 2;
1566 def : InstRW<[ZnWriteMULLd], (instregex "(V?)MUL(P|S)(S|D)rm")>;
1569 def ZnWriteMULYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
1571 let NumMicroOps = 2;
1573 def : InstRW<[ZnWriteMULYLd], (instregex "(V?)MUL(P|S)(S|D)Yrm")>;
1577 def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> {
1579 let ResourceCycles = [12];
1581 def : InstRW<[ZnWriteVDIVPSYr], (instregex "VDIVPSYrr")>;
1584 def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1586 let NumMicroOps = 2;
1587 let ResourceCycles = [1, 19];
1589 def : InstRW<[ZnWriteVDIVPSYLd], (instregex "VDIVPSYrm")>;
1593 def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> {
1595 let ResourceCycles = [15];
1597 def : InstRW<[ZnWriteVDIVPDY], (instregex "VDIVPDYrr")>;
1600 def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1602 let NumMicroOps = 2;
1603 let ResourceCycles = [1,22];
1605 def : InstRW<[ZnWriteVDIVPDYLd], (instregex "VDIVPDYrm")>;
1609 def ZnWriteVRCPPSr : SchedWriteRes<[ZnFPU01]> {
1612 def : InstRW<[ZnWriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>;
1615 def ZnWriteVRCPPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
1617 let NumMicroOps = 3;
1619 def : InstRW<[ZnWriteVRCPPSLd], (instregex "VRCPPSYm(_Int)?")>;
1621 // ROUND SS/SD PS/PD.
1623 def ZnWriteROUNDr : SchedWriteRes<[ZnFPU3]> {
1626 def : InstRW<[ZnWriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>;
1630 def ZnWriteFMADDr : SchedWriteRes<[ZnFPU03]> {
1633 def : InstRW<[ZnWriteFMADDr],
1635 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(213|132|231)(Y)?r",
1636 "VF(N?)M(ADD|SUB)(132|231|213)S(S|D)r",
1637 "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
1638 "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>;
1641 def ZnWriteFMADDm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
1643 let NumMicroOps = 2;
1645 def : InstRW<[ZnWriteFMADDm],
1647 "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)(213|132|231)P(S|D)(Y)?m",
1648 "VF(N?)M(ADD|SUB)(132|231|213)S(S|D)m",
1649 "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
1650 "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>;
1653 def ZnWriteROUNDm : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1655 let NumMicroOps = 2;
1657 def : InstRW<[ZnWriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>;
1661 def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rri")>;
1664 def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPS(Y?)rmi")>;
1668 def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrri")>;
1671 def : InstRW<[WriteMicrocoded], (instregex "(V?)DPPDrmi")>;
1675 def ZnWriteVSQRTPSYr : SchedWriteRes<[ZnFPU3]> {
1677 let ResourceCycles = [28];
1679 def : InstRW<[ZnWriteVSQRTPSYr], (instregex "VSQRTPSYr")>;
1682 def ZnWriteVSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1684 let ResourceCycles = [1,35];
1685 let NumMicroOps = 2;
1687 def : InstRW<[ZnWriteVSQRTPSYLd], (instregex "VSQRTPSYm")>;
1691 def ZnWriteVSQRTPDYr : SchedWriteRes<[ZnFPU3]> {
1693 let ResourceCycles = [40];
1695 def : InstRW<[ZnWriteVSQRTPDYr], (instregex "VSQRTPDYr")>;
1698 def ZnWriteVSQRTPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
1700 let NumMicroOps = 2;
1701 let ResourceCycles = [1,47];
1703 def : InstRW<[ZnWriteVSQRTPDYLd], (instregex "VSQRTPDYm")>;
1707 def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> {
1710 def : InstRW<[ZnWriteRSQRTSSr], (instregex "(V?)RSQRTSS(Y?)r(_Int)?")>;
1714 def ZnWriteRSQRTPSr : SchedWriteRes<[ZnFPU01]> {
1717 def : InstRW<[ZnWriteRSQRTPSr], (instregex "(V?)RSQRTPS(Y?)r(_Int)?")>;
1721 def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> {
1723 let NumMicroOps = 2;
1724 let ResourceCycles = [1,2];
1726 def : InstRW<[ZnWriteRSQRTSSLd], (instregex "(V?)RSQRTSSm(_Int)?")>;
1729 def ZnWriteRSQRTPSLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
1731 let NumMicroOps = 2;
1733 def : InstRW<[ZnWriteRSQRTPSLd], (instregex "(V?)RSQRTPSm(_Int)?")>;
1737 def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> {
1739 let NumMicroOps = 2;
1740 let ResourceCycles = [2];
1742 def : InstRW<[ZnWriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>;
1745 def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
1747 let NumMicroOps = 2;
1749 def : InstRW<[ZnWriteRSQRTPSYLd], (instregex "VRSQRTPSYm(_Int)?")>;
1751 //-- Logic instructions --//
1753 // AND, ANDN, OR, XOR PS/PD.
1755 def : InstRW<[WriteVecLogic], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>;
1757 def : InstRW<[WriteVecLogicLd],
1758 (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>;
1760 //-- Other instructions --//
1763 def : InstRW<[WriteMicrocoded], (instregex "VZEROUPPER")>;
1766 def : InstRW<[WriteMicrocoded], (instregex "VZEROALL")>;
1769 def : InstRW<[WriteMicrocoded], (instregex "(V)?LDMXCSR")>;
1772 def : InstRW<[WriteMicrocoded], (instregex "(V)?STMXCSR")>;