1 //===---- X86InstrAMX.td - AMX Instruction Set Extension --*- tablegen -*--===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the instructions that make up the Intel AMX instruction
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
17 let Predicates = [HasAMXTILE, In64BitMode] in {
18 let SchedRW = [WriteSystem] in {
19 let hasSideEffects = 1,
20 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
21 def LDTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
23 [(int_x86_ldtilecfg addr:$src)]>, VEX, T8PS;
24 let hasSideEffects = 1 in
25 def STTILECFG : I <0x49, MRM0m, (outs), (ins opaquemem:$src),
27 [(int_x86_sttilecfg addr:$src)]>, VEX, T8PD;
29 def TILELOADD : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
31 "tileloadd\t{$src, $dst|$dst, $src}", []>,
34 def TILELOADDT1 : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst),
36 "tileloaddt1\t{$src, $dst|$dst, $src}", []>,
38 let Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
39 def TILERELEASE : I<0x49, MRM_C0, (outs), (ins),
40 "tilerelease", [(int_x86_tilerelease)]>, VEX, T8PS;
42 def TILESTORED : I<0x4b, MRMDestMemFSIB, (outs),
43 (ins sibmem:$dst, TILE:$src),
44 "tilestored\t{$src, $dst|$dst, $src}", []>,
46 def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins),
47 "tilezero\t$dst", []>,
50 // Pseduo instruction for RA.
51 let isPseudo = true, mayLoad = 1, hasSideEffects = 1,
52 Defs = [TMM0,TMM1,TMM2,TMM3,TMM4,TMM5,TMM6,TMM7] in
53 def PLDTILECFGV : PseudoI<(outs), (ins opaquemem:$src), []>;
54 let isPseudo = true, mayLoad = 1 in
55 def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
57 opaquemem:$src3), []>;
58 let isPseudo = true, mayLoad = 1 in
59 def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
61 opaquemem:$src3), []>;
62 let isPseudo = true, mayStore = 1 in
63 def PTILESTOREDV : PseudoI<(outs), (ins GR16:$src1,
64 GR16:$src2, opaquemem:$src3,
66 let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
68 def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
69 [(set TILE:$dst, (int_x86_tilezero_internal
70 GR16:$src1, GR16:$src2))]>;
72 let usesCustomInserter = 1 in {
73 // Pseudo instructions, using immediates instead of tile registers.
74 // To be translated to the actual instructions in X86ISelLowering.cpp
76 def PTILELOADD : PseudoI<(outs), (ins u8imm:$src1, sibmem:$src2), []>;
78 def PTILELOADDT1 : PseudoI<(outs), (ins u8imm:$src1,
81 def PTILESTORED : PseudoI<(outs), (ins i8mem:$dst, u8imm:$src), []>;
82 def PTILEZERO : PseudoI<(outs), (ins u8imm:$src),
83 [(int_x86_tilezero timm:$src)]>;
88 let Predicates = [HasAMXINT8, In64BitMode] in {
89 let SchedRW = [WriteSystem] in {
90 let Constraints = "$src1 = $dst" in {
91 def TDPBSSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
92 (ins TILE:$src1, TILE:$src2, TILE:$src3),
93 "tdpbssd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
95 def TDPBSUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
96 (ins TILE:$src1, TILE:$src2, TILE:$src3),
97 "tdpbsud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
99 def TDPBUSD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
100 (ins TILE:$src1, TILE:$src2, TILE:$src3),
101 "tdpbusd\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
103 def TDPBUUD : I<0x5e, MRMSrcReg4VOp3, (outs TILE:$dst),
104 (ins TILE:$src1, TILE:$src2, TILE:$src3),
105 "tdpbuud\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>,
109 // Pseduo instruction for RA.
110 let isPseudo = true, Constraints = "$src4 = $dst" in {
111 def PTDPBSSDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
112 GR16:$src2, GR16:$src3, TILE:$src4,
113 TILE:$src5, TILE:$src6),
115 (int_x86_tdpbssd_internal GR16:$src1, GR16:$src2,
116 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
117 def PTDPBSUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
118 GR16:$src2, GR16:$src3, TILE:$src4,
119 TILE:$src5, TILE:$src6),
121 (int_x86_tdpbsud_internal GR16:$src1, GR16:$src2,
122 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
123 def PTDPBUSDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
124 GR16:$src2, GR16:$src3, TILE:$src4,
125 TILE:$src5, TILE:$src6),
127 (int_x86_tdpbusd_internal GR16:$src1, GR16:$src2,
128 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
129 def PTDPBUUDV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
130 GR16:$src2, GR16:$src3, TILE:$src4,
131 TILE:$src5, TILE:$src6),
133 (int_x86_tdpbuud_internal GR16:$src1, GR16:$src2,
134 GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
137 let usesCustomInserter = 1 in {
138 // Pseudo instructions, using immediates instead of tile registers.
139 // To be translated to the actual instructions in X86ISelLowering.cpp
140 def PTDPBSSD : PseudoI<(outs), (ins u8imm:$src1,
141 u8imm:$src2, u8imm:$src3),
142 [(int_x86_tdpbssd timm:$src1,
143 timm:$src2, timm:$src3)]>;
144 def PTDPBSUD : PseudoI<(outs), (ins u8imm:$src1,
145 u8imm:$src2, u8imm:$src3),
146 [(int_x86_tdpbsud timm:$src1,
147 timm:$src2, timm:$src3)]>;
148 def PTDPBUSD : PseudoI<(outs), (ins u8imm:$src1,
149 u8imm:$src2, u8imm:$src3),
150 [(int_x86_tdpbusd timm:$src1,
151 timm:$src2, timm:$src3)]>;
152 def PTDPBUUD : PseudoI<(outs), (ins u8imm:$src1,
153 u8imm:$src2, u8imm:$src3),
154 [(int_x86_tdpbuud timm:$src1,
155 timm:$src2, timm:$src3)]>;
160 let Predicates = [HasAMXBF16, In64BitMode] in {
161 let SchedRW = [WriteSystem] in {
162 let Constraints = "$src1 = $dst" in
163 def TDPBF16PS : I<0x5c, MRMSrcReg4VOp3, (outs TILE:$dst),
164 (ins TILE:$src1, TILE:$src2, TILE:$src3),
165 "tdpbf16ps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
168 // Pseduo instruction for RA.
169 let isPseudo = true, Constraints = "$src4 = $dst" in
170 def PTDPBF16PSV : PseudoI<(outs TILE: $dst), (ins GR16:$src1,
171 GR16:$src2, GR16:$src3, TILE:$src4,
172 TILE:$src5, TILE:$src6),
174 (int_x86_tdpbf16ps_internal GR16:$src1,
175 GR16:$src2, GR16:$src3, TILE:$src4,
176 TILE:$src5, TILE:$src6))]>;
178 let usesCustomInserter = 1 in {
179 // Pseudo instructions, using immediates instead of tile registers.
180 // To be translated to the actual instructions in X86ISelLowering.cpp
181 def PTDPBF16PS : PseudoI<(outs), (ins u8imm:$src1,
182 u8imm:$src2, u8imm:$src3),
183 [(int_x86_tdpbf16ps timm:$src1,
184 timm:$src2, timm:$src3)]>;
187 } // HasAMXTILE, HasAMXBF16