1 //=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the machine model for Znver1 to support instruction
11 // scheduling and other instruction cost heuristics.
13 //===----------------------------------------------------------------------===//
15 def Znver1Model : SchedMachineModel {
16 // Zen can decode 4 instructions per cycle.
18 // Based on the reorder buffer we define MicroOpBufferSize
19 let MicroOpBufferSize = 192;
21 let MispredictPenalty = 17;
23 let PostRAScheduler = 1;
25 // FIXME: This variable is required for incomplete model.
26 // We haven't catered all instructions.
27 // So, we reset the value of this variable so as to
28 // say that the model is incomplete.
29 let CompleteModel = 0;
32 let SchedModel = Znver1Model in {
34 // Zen can issue micro-ops to 10 different units in one cycle.
36 // * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
37 // * Two AGU units (ZAGU0, ZAGU1)
38 // * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
39 // AGUs feed load store queues @two loads and 1 store per cycle.
41 // Four ALU units are defined below
42 def ZnALU0 : ProcResource<1>;
43 def ZnALU1 : ProcResource<1>;
44 def ZnALU2 : ProcResource<1>;
45 def ZnALU3 : ProcResource<1>;
47 // Two AGU units are defined below
48 def ZnAGU0 : ProcResource<1>;
49 def ZnAGU1 : ProcResource<1>;
51 // Four FPU units are defined below
52 def ZnFPU0 : ProcResource<1>;
53 def ZnFPU1 : ProcResource<1>;
54 def ZnFPU2 : ProcResource<1>;
55 def ZnFPU3 : ProcResource<1>;
58 def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>;
59 def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
60 def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>;
61 def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>;
62 def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>;
63 def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>;
64 def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>;
65 def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>;
67 // Below are the grouping of the units.
68 // Micro-ops to be issued to multiple units are tackled this way.
71 // ZnALU03 - 0,3 grouping
72 def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
74 // 56 Entry (14x4 entries) Int Scheduler
75 def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
79 // 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
80 // but are relevant for some instructions
81 def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
85 // Integer Multiplication issued on ALU1.
86 def ZnMultiplier : ProcResource<1>;
88 // Integer division issued on ALU2.
89 def ZnDivider : ProcResource<1>;
91 // 4 Cycles load-to use Latency is captured
92 def : ReadAdvance<ReadAfterLd, 4>;
94 // (a folded load is an instruction that loads and does some operation)
95 // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
96 // Instructions with folded loads are usually micro-fused, so they only appear
100 // This multiclass is for folded loads for integer units.
101 multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
102 ProcResourceKind ExePort,
104 // Register variant takes 1-cycle on Execution Port.
105 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
107 // Memory variant also uses a cycle on ZnAGU
108 // adds 4 cycles to the latency.
109 def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
110 let Latency = !add(Lat, 4);
114 // This multiclass is for folded loads for floating point units.
115 multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
116 ProcResourceKind ExePort,
118 // Register variant takes 1-cycle on Execution Port.
119 def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
121 // Memory variant also uses a cycle on ZnAGU
122 // adds 7 cycles to the latency.
123 def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> {
124 let Latency = !add(Lat, 7);
128 // WriteRMW is set for instructions with Memory write
129 // operation in codegen
130 def : WriteRes<WriteRMW, [ZnAGU]>;
132 def : WriteRes<WriteStore, [ZnAGU]>;
133 def : WriteRes<WriteMove, [ZnALU]>;
134 def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
136 def : WriteRes<WriteZero, []>;
137 def : WriteRes<WriteLEA, [ZnALU]>;
138 defm : ZnWriteResPair<WriteALU, ZnALU, 1>;
139 defm : ZnWriteResPair<WriteShift, ZnALU, 1>;
140 defm : ZnWriteResPair<WriteJump, ZnALU, 1>;
143 def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> {
145 let ResourceCycles = [1, 41];
148 def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> {
150 let ResourceCycles = [1, 4, 41];
154 def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
157 def : WriteRes<WriteIMul, [ZnALU1, ZnMultiplier]> {
161 def : WriteRes<WriteIMulLd,[ZnALU1, ZnMultiplier]> {
165 // Floating point operations
166 defm : ZnWriteResFpuPair<WriteFHAdd, ZnFPU0, 3>;
167 defm : ZnWriteResFpuPair<WriteFAdd, ZnFPU0, 3>;
168 defm : ZnWriteResFpuPair<WriteFBlend, ZnFPU01, 1>;
169 defm : ZnWriteResFpuPair<WriteFVarBlend, ZnFPU01, 1>;
170 defm : ZnWriteResFpuPair<WriteVarBlend, ZnFPU0, 1>;
171 defm : ZnWriteResFpuPair<WriteCvtI2F, ZnFPU3, 5>;
172 defm : ZnWriteResFpuPair<WriteCvtF2F, ZnFPU3, 5>;
173 defm : ZnWriteResFpuPair<WriteCvtF2I, ZnFPU3, 5>;
174 defm : ZnWriteResFpuPair<WriteFDiv, ZnFPU3, 15>;
175 defm : ZnWriteResFpuPair<WriteFShuffle, ZnFPU12, 1>;
176 defm : ZnWriteResFpuPair<WriteFMul, ZnFPU0, 5>;
177 defm : ZnWriteResFpuPair<WriteFRcp, ZnFPU01, 5>;
178 defm : ZnWriteResFpuPair<WriteFRsqrt, ZnFPU01, 5>;
179 defm : ZnWriteResFpuPair<WriteFSqrt, ZnFPU3, 20>;
181 // Vector integer operations which uses FPU units
182 defm : ZnWriteResFpuPair<WriteVecShift, ZnFPU, 1>;
183 defm : ZnWriteResFpuPair<WriteVecLogic, ZnFPU, 1>;
184 defm : ZnWriteResFpuPair<WritePHAdd, ZnFPU, 1>;
185 defm : ZnWriteResFpuPair<WriteVecALU, ZnFPU, 1>;
186 defm : ZnWriteResFpuPair<WriteVecIMul, ZnFPU0, 4>;
187 defm : ZnWriteResFpuPair<WriteShuffle, ZnFPU, 1>;
188 defm : ZnWriteResFpuPair<WriteBlend, ZnFPU01, 1>;
189 defm : ZnWriteResFpuPair<WriteShuffle256, ZnFPU, 2>;
191 // Vector Shift Operations
192 defm : ZnWriteResFpuPair<WriteVarVecShift, ZnFPU12, 1>;
195 defm : ZnWriteResFpuPair<WriteAESDecEnc, ZnFPU01, 4>;
196 defm : ZnWriteResFpuPair<WriteAESIMC, ZnFPU01, 4>;
197 defm : ZnWriteResFpuPair<WriteAESKeyGen, ZnFPU01, 4>;
199 def : WriteRes<WriteFence, [ZnAGU]>;
200 def : WriteRes<WriteNop, []>;
202 // Following instructions with latency=100 are microcoded.
203 // We set long latency so as to block the entire pipeline.
204 defm : ZnWriteResFpuPair<WriteFShuffle256, ZnFPU, 100>;
206 //Microcoded Instructions
207 let Latency = 100 in {
208 def : WriteRes<WriteMicrocoded, []>;
209 def : WriteRes<WriteSystem, []>;
210 def : WriteRes<WriteMPSAD, []>;
211 def : WriteRes<WriteMPSADLd, []>;
212 def : WriteRes<WriteCLMul, []>;
213 def : WriteRes<WriteCLMulLd, []>;
214 def : WriteRes<WritePCmpIStrM, []>;
215 def : WriteRes<WritePCmpIStrMLd, []>;
216 def : WriteRes<WritePCmpEStrI, []>;
217 def : WriteRes<WritePCmpEStrILd, []>;
218 def : WriteRes<WritePCmpEStrM, []>;
219 def : WriteRes<WritePCmpEStrMLd, []>;
220 def : WriteRes<WritePCmpIStrI, []>;
221 def : WriteRes<WritePCmpIStrILd, []>;