//=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file defines the machine model for Znver1 to support instruction // scheduling and other instruction cost heuristics. // //===----------------------------------------------------------------------===// def Znver1Model : SchedMachineModel { // Zen can decode 4 instructions per cycle. let IssueWidth = 4; // Based on the reorder buffer we define MicroOpBufferSize let MicroOpBufferSize = 192; let LoadLatency = 4; let MispredictPenalty = 17; let HighLatency = 25; let PostRAScheduler = 1; // FIXME: This variable is required for incomplete model. // We haven't catered all instructions. // So, we reset the value of this variable so as to // say that the model is incomplete. let CompleteModel = 0; } let SchedModel = Znver1Model in { // Zen can issue micro-ops to 10 different units in one cycle. // These are // * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3) // * Two AGU units (ZAGU0, ZAGU1) // * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3) // AGUs feed load store queues @two loads and 1 store per cycle. // Four ALU units are defined below def ZnALU0 : ProcResource<1>; def ZnALU1 : ProcResource<1>; def ZnALU2 : ProcResource<1>; def ZnALU3 : ProcResource<1>; // Two AGU units are defined below def ZnAGU0 : ProcResource<1>; def ZnAGU1 : ProcResource<1>; // Four FPU units are defined below def ZnFPU0 : ProcResource<1>; def ZnFPU1 : ProcResource<1>; def ZnFPU2 : ProcResource<1>; def ZnFPU3 : ProcResource<1>; // FPU grouping def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]>; def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>; def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>; def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>; def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>; def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>; def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>; def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>; // Below are the grouping of the units. // Micro-ops to be issued to multiple units are tackled this way. // ALU grouping // ZnALU03 - 0,3 grouping def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>; // 56 Entry (14x4 entries) Int Scheduler def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> { let BufferSize=56; } // 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations // but are relevant for some instructions def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> { let BufferSize=28; } // Integer Multiplication issued on ALU1. def ZnMultiplier : ProcResource<1>; // Integer division issued on ALU2. def ZnDivider : ProcResource<1>; // 4 Cycles load-to use Latency is captured def : ReadAdvance; // (a folded load is an instruction that loads and does some operation) // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops // Instructions with folded loads are usually micro-fused, so they only appear // as two micro-ops. // a. load and // b. addpd // This multiclass is for folded loads for integer units. multiclass ZnWriteResPair { // Register variant takes 1-cycle on Execution Port. def : WriteRes { let Latency = Lat; } // Memory variant also uses a cycle on ZnAGU // adds 4 cycles to the latency. def : WriteRes { let Latency = !add(Lat, 4); } } // This multiclass is for folded loads for floating point units. multiclass ZnWriteResFpuPair { // Register variant takes 1-cycle on Execution Port. def : WriteRes { let Latency = Lat; } // Memory variant also uses a cycle on ZnAGU // adds 7 cycles to the latency. def : WriteRes { let Latency = !add(Lat, 7); } } // WriteRMW is set for instructions with Memory write // operation in codegen def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes { let Latency = 8; } def : WriteRes; def : WriteRes; defm : ZnWriteResPair; defm : ZnWriteResPair; defm : ZnWriteResPair; // IDIV def : WriteRes { let Latency = 41; let ResourceCycles = [1, 41]; } def : WriteRes { let Latency = 45; let ResourceCycles = [1, 4, 41]; } // IMUL def : WriteRes{ let Latency = 4; } def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 8; } // Floating point operations defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; // Vector integer operations which uses FPU units defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; // Vector Shift Operations defm : ZnWriteResFpuPair; // AES Instructions. defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; defm : ZnWriteResFpuPair; def : WriteRes; def : WriteRes; // Following instructions with latency=100 are microcoded. // We set long latency so as to block the entire pipeline. defm : ZnWriteResFpuPair; //Microcoded Instructions let Latency = 100 in { def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; def : WriteRes; } }