1 //===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines resources required by some of P9 instruction. This is part
11 // P9 processor model used for instruction scheduling. Not every instruction
12 // is listed here. Instructions in this file belong to itinerary classes that
13 // have instructions with different resource requirements.
15 // The makeup of the P9 CPU is modeled as follows:
16 // - Each CPU is made up of two superslices.
17 // - Each superslice is made up of two slices. Therefore, there are 4 slices
19 // - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
21 // - One CY (Crypto) unit P9_CY_*
22 // - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23 // - Two PM (Permute) units. One on each superslice. P9_PM_*
24 // - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25 // - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26 // - Four DP (Floating Point) units. One on each slice. P9_DP_*
27 // This also includes fixed point multiply add.
28 // - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29 // - Four Load/Store Queues. P9_LS_*
30 // - Each set of instructions will require a number of these resources.
31 //===----------------------------------------------------------------------===//
33 // Two cycle ALU vector operation that uses an entire superslice.
34 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35 // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
36 def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
37 DISP_1C, DISP_1C, DISP_1C],
131 // Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
132 // slingle slice. However, since it is Restricted it requires all 3 dispatches
133 // (DISP) for that superslice.
134 def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
142 // Standard Dispatch ALU operation for 3 cycles. Only one slice used.
143 def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C],
163 // Standard Dispatch ALU operation for 2 cycles. Only one slice used.
164 def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
182 // Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
183 // slingle slice. However, since it is Restricted it requires all 3 dispatches
184 // (DISP) for that superslice.
185 def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
205 // Three cycle ALU vector operation that uses an entire superslice.
206 // Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
207 // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
208 def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C,
209 DISP_1C, DISP_1C, DISP_1C],
326 // 7 cycle DP vector operation that uses an entire superslice.
327 // Uses both DP units (the even DPE and odd DPO units), two pipelines
328 // (EXECE, EXECO) and all three dispatches (DISP) to the given superslice.
329 def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C,
330 DISP_1C, DISP_1C, DISP_1C],
437 // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
438 // dispatch units for the superslice.
439 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
498 // 7 cycle Restricted DP operation and one 2 cycle ALU operation.
499 // The DP is restricted so we need a full 5 dispatches.
500 def : InstRW<[P9_DPOpAndALUOp_9C, IP_EXEC_1C, IP_EXEC_1C,
501 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
510 // 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units.
511 def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
544 // Three Cycle PM operation. Only one PM unit per superslice so we use the whole
545 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
547 def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],
648 // 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
649 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
651 def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
670 // 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
671 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
673 def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
687 // 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
688 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
690 def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
696 // 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
697 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
699 def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C],
705 // 5 Cycle load uses a single slice.
706 def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
718 // 4 Cycle load uses a single slice.
719 def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C],
724 // 4 Cycle Restricted load uses a single slice but the dispatch for the whole
726 def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
733 // Cracked Restricted Load instruction.
734 // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
735 // operations cannot be done at the same time and so their latencies are added.
736 // Full 6 dispatches are required as this is both cracked and restricted.
737 def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
738 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
745 // Cracked Load instruction.
746 // Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
747 // operations cannot be done at the same time and so their latencies are added.
748 // Full 4 dispatches are required as this is a cracked instruction.
749 def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
750 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
761 // Cracked Load that requires the PM resource.
762 // Since the Load and the PM cannot be done at the same time the latencies are
763 // added. Requires 8 cycles.
764 // Since the PM requires the full superslice we need both EXECE, EXECO pipelines
765 // as well as 3 dispatches for the PM. The Load requires the remaining 2
767 def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
768 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
775 // Single slice Restricted store operation. The restricted operation requires
776 // all three dispatches for the superslice.
777 def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
794 // Store operation that requires the whole superslice.
795 def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C,
796 DISP_1C, DISP_1C, DISP_1C],
803 // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
804 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
806 def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C,
807 DISP_1C, DISP_1C, DISP_1C],
814 // 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
815 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
817 def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C,
818 DISP_1C, DISP_1C, DISP_1C],
829 // 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
830 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
832 def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C,
833 DISP_1C, DISP_1C, DISP_1C],
839 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
840 // and one full superslice for the DIV operation since there is only one DIV
841 // per superslice. Latency of DIV plus ALU is 26.
842 def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
843 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
851 // Cracked DIV and ALU operation. Requires one full slice for the ALU operation
852 // and one full superslice for the DIV operation since there is only one DIV
853 // per superslice. Latency of DIV plus ALU is 42.
854 def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
855 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
861 // CR access instructions in _BrMCR, IIC_BrMCRX.
863 // Cracked, restricted, ALU operations.
864 // Here the two ALU ops can actually be done in parallel and therefore the
865 // latencies are not added together. Otherwise this is like having two
866 // instructions running together on two pipelines and 6 dispatches.
867 // ALU ops are 2 cycles each.
868 def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
869 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
877 // Cracked, restricted, ALU operations.
878 // Here the two ALU ops can actually be done in parallel and therefore the
879 // latencies are not added together. Otherwise this is like having two
880 // instructions running together on two pipelines and 6 dispatches.
881 // ALU ops are 3 cycles each.
882 def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
883 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
888 // FP Div instructions in IIC_FPDivD and IIC_FPDivS.
890 // 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
891 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
896 // 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
897 def : InstRW<[P9_DPOpAndALUOp_35C_8, IP_EXEC_1C, IP_EXEC_1C,
898 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
903 // 33 Cycle DP Instruction. Takes one slice and 2 dispatches.
904 def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C],
909 // 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
910 def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
915 // 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
916 def : InstRW<[P9_DPOpAndALUOp_24C_5, IP_EXEC_1C, IP_EXEC_1C,
917 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
922 // 22 Cycle DP Instruction. Takes one slice and 2 dispatches.
923 def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C],
928 // 24 Cycle DP Vector Instruction. Takes one full superslice.
929 // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
931 def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
932 DISP_1C, DISP_1C, DISP_1C],
937 // 33 Cycle DP Vector Instruction. Takes one full superslice.
938 // Includes both EXECE, EXECO pipelines and all 3 dispatches for the given
940 def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
941 DISP_1C, DISP_1C, DISP_1C],
946 // Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.
948 // Instruction cracked into three pieces. One Load and two ALU operations.
949 // The Load and one of the ALU ops cannot be run at the same time and so the
950 // latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
951 // Both the load and the ALU that depends on it are restricted and so they take
952 // a total of 6 dispatches. The final 2 dispatches come from the second ALU op.
953 // The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
954 def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
955 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
956 DISP_1C, DISP_1C, DISP_1C, DISP_1C,
957 DISP_1C, DISP_1C, DISP_1C, DISP_1C],
963 // Cracked instruction made up of a Load and an ALU. The ALU does not depend on
964 // the load and so it can be run at the same time as the load. The load is also
965 // restricted. 3 dispatches are from the restricted load while the other two
966 // are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
967 // is required for the ALU.
968 def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
969 DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
975 // Crypto Instructions
977 // 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
978 // superslice. That includes both exec pipelines (EXECO, EXECE) and all three
980 def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C],