1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT). These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22 RegisterClass RC = rc;
23 ValueType EltVT = eltvt;
24 int NumElts = numelts;
26 // Corresponding mask register class.
27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29 // Corresponding mask register pair class.
30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31 !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33 // Corresponding write-mask register class.
34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39 // Suffix used in the instruction mnemonic.
40 string Suffix = suffix;
42 // VTName is a string name for vector VT. For vector types it will be
43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44 // It is a little bit complex for scalar types, where NumElts = 1.
45 // In this case we build v4f32 or v2f64
46 string VTName = "v" # !if (!eq (NumElts, 1),
47 !if (!eq (EltVT.Size, 32), 4,
48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
51 ValueType VT = !cast<ValueType>(VTName);
53 string EltTypeName = !cast<string>(EltVT);
54 // Size of the element type in bits, e.g. 32 for v16i32.
55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56 int EltSize = EltVT.Size;
58 // "i" for integer types and "f" for floating-point types
59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61 // Size of RC in bits, e.g. 512 for VR512.
64 // The corresponding memory operand, e.g. i512mem for VR512.
65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67 // FP scalar memory operand for intrinsics - ssmem/sdmem.
68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
79 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
80 !cast<ComplexPattern>("sse_load_f32"),
81 !if (!eq (EltTypeName, "f64"),
82 !cast<ComplexPattern>("sse_load_f64"),
85 // The string to specify embedded broadcast in assembly.
86 string BroadcastStr = "{1to" # NumElts # "}";
88 // 8-bit compressed displacement tuple/subvector format. This is only
89 // defined for NumElts <= 8.
90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91 !cast<CD8VForm>("CD8VT" # NumElts), ?);
93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94 !if (!eq (Size, 256), sub_ymm, ?));
96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
100 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
102 dag ImmAllZerosV = (VT immAllZerosV);
104 string ZSuffix = !if (!eq (Size, 128), "Z128",
105 !if (!eq (Size, 256), "Z256", "Z"));
108 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
109 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
112 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
115 // "x" in v32i8x_info means RC = VR256X
116 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
117 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
119 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
120 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
121 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
123 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
124 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
125 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
126 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
127 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
128 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
130 // We map scalar types to the smallest (128-bit) vector type
131 // with the appropriate element type. This allows to use the same masking logic.
132 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
133 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
134 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
135 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
137 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138 X86VectorVTInfo i128> {
139 X86VectorVTInfo info512 = i512;
140 X86VectorVTInfo info256 = i256;
141 X86VectorVTInfo info128 = i128;
144 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
146 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
148 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
150 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
152 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
154 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
157 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
159 RegisterClass KRC = _krc;
160 RegisterClass KRCWM = _krcwm;
164 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
172 // This multiclass generates the masking variants from the non-masking
173 // variant. It only provides the assembly pieces for the masking variants.
174 // It assumes custom ISel patterns for masking which can be provided as
175 // template arguments.
176 multiclass AVX512_maskable_custom<bits<8> O, Format F,
178 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
180 string AttSrcAsm, string IntelSrcAsm,
182 list<dag> MaskingPattern,
183 list<dag> ZeroMaskingPattern,
184 string MaskingConstraint = "",
185 bit IsCommutable = 0,
186 bit IsKCommutable = 0,
187 bit IsKZCommutable = IsCommutable> {
188 let isCommutable = IsCommutable in
189 def NAME: AVX512<O, F, Outs, Ins,
190 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
191 "$dst, "#IntelSrcAsm#"}",
194 // Prefer over VMOV*rrk Pat<>
195 let isCommutable = IsKCommutable in
196 def NAME#k: AVX512<O, F, Outs, MaskingIns,
197 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
198 "$dst {${mask}}, "#IntelSrcAsm#"}",
201 // In case of the 3src subclass this is overridden with a let.
202 string Constraints = MaskingConstraint;
205 // Zero mask does not add any restrictions to commute operands transformation.
206 // So, it is Ok to use IsCommutable instead of IsKCommutable.
207 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
208 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
209 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
210 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
216 // Common base class of AVX512_maskable and AVX512_maskable_3src.
217 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
219 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
221 string AttSrcAsm, string IntelSrcAsm,
222 dag RHS, dag MaskingRHS,
223 SDNode Select = vselect,
224 string MaskingConstraint = "",
225 bit IsCommutable = 0,
226 bit IsKCommutable = 0,
227 bit IsKZCommutable = IsCommutable> :
228 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
229 AttSrcAsm, IntelSrcAsm,
230 [(set _.RC:$dst, RHS)],
231 [(set _.RC:$dst, MaskingRHS)],
233 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
234 MaskingConstraint, IsCommutable,
235 IsKCommutable, IsKZCommutable>;
237 // This multiclass generates the unconditional/non-masking, the masking and
238 // the zero-masking variant of the vector instruction. In the masking case, the
239 // perserved vector elements come from a new dummy input operand tied to $dst.
240 // This version uses a separate dag for non-masking and masking.
241 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
242 dag Outs, dag Ins, string OpcodeStr,
243 string AttSrcAsm, string IntelSrcAsm,
244 dag RHS, dag MaskRHS,
245 bit IsCommutable = 0, bit IsKCommutable = 0,
246 SDNode Select = vselect> :
247 AVX512_maskable_custom<O, F, Outs, Ins,
248 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
249 !con((ins _.KRCWM:$mask), Ins),
250 OpcodeStr, AttSrcAsm, IntelSrcAsm,
251 [(set _.RC:$dst, RHS)],
253 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
255 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
256 "$src0 = $dst", IsCommutable, IsKCommutable>;
258 // This multiclass generates the unconditional/non-masking, the masking and
259 // the zero-masking variant of the vector instruction. In the masking case, the
260 // perserved vector elements come from a new dummy input operand tied to $dst.
261 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
262 dag Outs, dag Ins, string OpcodeStr,
263 string AttSrcAsm, string IntelSrcAsm,
265 bit IsCommutable = 0, bit IsKCommutable = 0,
266 bit IsKZCommutable = IsCommutable,
267 SDNode Select = vselect> :
268 AVX512_maskable_common<O, F, _, Outs, Ins,
269 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
270 !con((ins _.KRCWM:$mask), Ins),
271 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
272 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
273 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
276 // This multiclass generates the unconditional/non-masking, the masking and
277 // the zero-masking variant of the scalar instruction.
278 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
279 dag Outs, dag Ins, string OpcodeStr,
280 string AttSrcAsm, string IntelSrcAsm,
282 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
283 RHS, 0, 0, 0, X86selects>;
285 // Similar to AVX512_maskable but in this case one of the source operands
286 // ($src1) is already tied to $dst so we just use that for the preserved
287 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
289 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
290 dag Outs, dag NonTiedIns, string OpcodeStr,
291 string AttSrcAsm, string IntelSrcAsm,
293 bit IsCommutable = 0,
294 bit IsKCommutable = 0,
295 SDNode Select = vselect,
297 AVX512_maskable_common<O, F, _, Outs,
298 !con((ins _.RC:$src1), NonTiedIns),
299 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
301 OpcodeStr, AttSrcAsm, IntelSrcAsm,
302 !if(MaskOnly, (null_frag), RHS),
303 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
304 Select, "", IsCommutable, IsKCommutable>;
306 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
307 // operand differs from the output VT. This requires a bitconvert on
308 // the preserved vector going into the vselect.
309 // NOTE: The unmasked pattern is disabled.
310 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
311 X86VectorVTInfo InVT,
312 dag Outs, dag NonTiedIns, string OpcodeStr,
313 string AttSrcAsm, string IntelSrcAsm,
314 dag RHS, bit IsCommutable = 0> :
315 AVX512_maskable_common<O, F, OutVT, Outs,
316 !con((ins InVT.RC:$src1), NonTiedIns),
317 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
319 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
320 (vselect InVT.KRCWM:$mask, RHS,
321 (bitconvert InVT.RC:$src1)),
322 vselect, "", IsCommutable>;
324 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
325 dag Outs, dag NonTiedIns, string OpcodeStr,
326 string AttSrcAsm, string IntelSrcAsm,
328 bit IsCommutable = 0,
329 bit IsKCommutable = 0,
331 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
332 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
333 X86selects, MaskOnly>;
335 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
338 string AttSrcAsm, string IntelSrcAsm,
340 AVX512_maskable_custom<O, F, Outs, Ins,
341 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
342 !con((ins _.KRCWM:$mask), Ins),
343 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
346 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
347 dag Outs, dag NonTiedIns,
349 string AttSrcAsm, string IntelSrcAsm,
351 AVX512_maskable_custom<O, F, Outs,
352 !con((ins _.RC:$src1), NonTiedIns),
353 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
355 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
358 // Instruction with mask that puts result in mask register,
359 // like "compare" and "vptest"
360 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
362 dag Ins, dag MaskingIns,
364 string AttSrcAsm, string IntelSrcAsm,
366 list<dag> MaskingPattern,
367 bit IsCommutable = 0> {
368 let isCommutable = IsCommutable in {
369 def NAME: AVX512<O, F, Outs, Ins,
370 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
371 "$dst, "#IntelSrcAsm#"}",
374 def NAME#k: AVX512<O, F, Outs, MaskingIns,
375 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
376 "$dst {${mask}}, "#IntelSrcAsm#"}",
377 MaskingPattern>, EVEX_K;
381 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
383 dag Ins, dag MaskingIns,
385 string AttSrcAsm, string IntelSrcAsm,
386 dag RHS, dag MaskingRHS,
387 bit IsCommutable = 0> :
388 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
389 AttSrcAsm, IntelSrcAsm,
390 [(set _.KRC:$dst, RHS)],
391 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
393 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
394 dag Outs, dag Ins, string OpcodeStr,
395 string AttSrcAsm, string IntelSrcAsm,
396 dag RHS, dag RHS_su, bit IsCommutable = 0> :
397 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
398 !con((ins _.KRCWM:$mask), Ins),
399 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
400 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
403 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
404 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
405 // swizzled by ExecutionDomainFix to pxor.
406 // We set canFoldAsLoad because this can be converted to a constant-pool
407 // load of an all-zeros value if folding it would be beneficial.
408 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
409 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
410 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
411 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
412 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
413 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
416 let Predicates = [HasAVX512] in {
417 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
418 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
419 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
420 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
421 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
424 // Alias instructions that allow VPTERNLOG to be used with a mask to create
425 // a mix of all ones and all zeros elements. This is done this way to force
426 // the same register to be used as input for all three sources.
427 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
428 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
429 (ins VK16WM:$mask), "",
430 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
431 (v16i32 immAllOnesV),
432 (v16i32 immAllZerosV)))]>;
433 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
434 (ins VK8WM:$mask), "",
435 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
437 (v8i64 immAllZerosV)))]>;
440 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
441 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
442 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
443 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
444 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
445 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
448 let Predicates = [HasAVX512] in {
449 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
450 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
451 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
452 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
453 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
454 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
455 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
456 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
457 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
458 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
461 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
462 // This is expanded by ExpandPostRAPseudos.
463 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
464 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
465 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
466 [(set FR32X:$dst, fp32imm0)]>;
467 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
468 [(set FR64X:$dst, fp64imm0)]>;
469 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
470 [(set VR128X:$dst, fp128imm0)]>;
473 //===----------------------------------------------------------------------===//
474 // AVX-512 - VECTOR INSERT
477 // Supports two different pattern operators for mask and unmasked ops. Allows
478 // null_frag to be passed for one.
479 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
481 SDPatternOperator vinsert_insert,
482 SDPatternOperator vinsert_for_mask,
483 X86FoldableSchedWrite sched> {
484 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
485 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
486 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
487 "vinsert" # From.EltTypeName # "x" # From.NumElts,
488 "$src3, $src2, $src1", "$src1, $src2, $src3",
489 (vinsert_insert:$src3 (To.VT To.RC:$src1),
490 (From.VT From.RC:$src2),
492 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
493 (From.VT From.RC:$src2),
495 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
497 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
498 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
499 "vinsert" # From.EltTypeName # "x" # From.NumElts,
500 "$src3, $src2, $src1", "$src1, $src2, $src3",
501 (vinsert_insert:$src3 (To.VT To.RC:$src1),
502 (From.VT (From.LdFrag addr:$src2)),
504 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
505 (From.VT (From.LdFrag addr:$src2)),
506 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
507 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
508 Sched<[sched.Folded, sched.ReadAfterFold]>;
512 // Passes the same pattern operator for masked and unmasked ops.
513 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
515 SDPatternOperator vinsert_insert,
516 X86FoldableSchedWrite sched> :
517 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
519 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
520 X86VectorVTInfo To, PatFrag vinsert_insert,
521 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
522 let Predicates = p in {
523 def : Pat<(vinsert_insert:$ins
524 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
525 (To.VT (!cast<Instruction>(InstrStr#"rr")
526 To.RC:$src1, From.RC:$src2,
527 (INSERT_get_vinsert_imm To.RC:$ins)))>;
529 def : Pat<(vinsert_insert:$ins
531 (From.VT (From.LdFrag addr:$src2)),
533 (To.VT (!cast<Instruction>(InstrStr#"rm")
534 To.RC:$src1, addr:$src2,
535 (INSERT_get_vinsert_imm To.RC:$ins)))>;
539 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
540 ValueType EltVT64, int Opcode256,
541 X86FoldableSchedWrite sched> {
543 let Predicates = [HasVLX] in
544 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
545 X86VectorVTInfo< 4, EltVT32, VR128X>,
546 X86VectorVTInfo< 8, EltVT32, VR256X>,
547 vinsert128_insert, sched>, EVEX_V256;
549 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
550 X86VectorVTInfo< 4, EltVT32, VR128X>,
551 X86VectorVTInfo<16, EltVT32, VR512>,
552 vinsert128_insert, sched>, EVEX_V512;
554 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
555 X86VectorVTInfo< 4, EltVT64, VR256X>,
556 X86VectorVTInfo< 8, EltVT64, VR512>,
557 vinsert256_insert, sched>, VEX_W, EVEX_V512;
559 // Even with DQI we'd like to only use these instructions for masking.
560 let Predicates = [HasVLX, HasDQI] in
561 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
562 X86VectorVTInfo< 2, EltVT64, VR128X>,
563 X86VectorVTInfo< 4, EltVT64, VR256X>,
564 null_frag, vinsert128_insert, sched>,
567 // Even with DQI we'd like to only use these instructions for masking.
568 let Predicates = [HasDQI] in {
569 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
570 X86VectorVTInfo< 2, EltVT64, VR128X>,
571 X86VectorVTInfo< 8, EltVT64, VR512>,
572 null_frag, vinsert128_insert, sched>,
575 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
576 X86VectorVTInfo< 8, EltVT32, VR256X>,
577 X86VectorVTInfo<16, EltVT32, VR512>,
578 null_frag, vinsert256_insert, sched>,
583 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
584 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
585 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
587 // Codegen pattern with the alternative types,
588 // Even with AVX512DQ we'll still use these for unmasked operations.
589 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
590 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
591 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
592 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
594 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
595 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
596 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
597 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
599 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
600 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
601 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
602 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
604 // Codegen pattern with the alternative types insert VEC128 into VEC256
605 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
606 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
607 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
608 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
609 // Codegen pattern with the alternative types insert VEC128 into VEC512
610 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
611 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
612 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
614 // Codegen pattern with the alternative types insert VEC256 into VEC512
615 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
616 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
617 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
618 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
621 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
622 X86VectorVTInfo To, X86VectorVTInfo Cast,
623 PatFrag vinsert_insert,
624 SDNodeXForm INSERT_get_vinsert_imm,
626 let Predicates = p in {
628 (vselect Cast.KRCWM:$mask,
630 (vinsert_insert:$ins (To.VT To.RC:$src1),
631 (From.VT From.RC:$src2),
634 (!cast<Instruction>(InstrStr#"rrk")
635 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
636 (INSERT_get_vinsert_imm To.RC:$ins))>;
638 (vselect Cast.KRCWM:$mask,
640 (vinsert_insert:$ins (To.VT To.RC:$src1),
643 (From.LdFrag addr:$src2))),
646 (!cast<Instruction>(InstrStr#"rmk")
647 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
648 (INSERT_get_vinsert_imm To.RC:$ins))>;
651 (vselect Cast.KRCWM:$mask,
653 (vinsert_insert:$ins (To.VT To.RC:$src1),
654 (From.VT From.RC:$src2),
657 (!cast<Instruction>(InstrStr#"rrkz")
658 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659 (INSERT_get_vinsert_imm To.RC:$ins))>;
661 (vselect Cast.KRCWM:$mask,
663 (vinsert_insert:$ins (To.VT To.RC:$src1),
664 (From.VT (From.LdFrag addr:$src2)),
667 (!cast<Instruction>(InstrStr#"rmkz")
668 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
669 (INSERT_get_vinsert_imm To.RC:$ins))>;
673 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
674 v8f32x_info, vinsert128_insert,
675 INSERT_get_vinsert128_imm, [HasVLX]>;
676 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
677 v4f64x_info, vinsert128_insert,
678 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
680 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
681 v8i32x_info, vinsert128_insert,
682 INSERT_get_vinsert128_imm, [HasVLX]>;
683 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
684 v8i32x_info, vinsert128_insert,
685 INSERT_get_vinsert128_imm, [HasVLX]>;
686 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
687 v8i32x_info, vinsert128_insert,
688 INSERT_get_vinsert128_imm, [HasVLX]>;
689 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
690 v4i64x_info, vinsert128_insert,
691 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
692 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
693 v4i64x_info, vinsert128_insert,
694 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
695 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
696 v4i64x_info, vinsert128_insert,
697 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
699 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
700 v16f32_info, vinsert128_insert,
701 INSERT_get_vinsert128_imm, [HasAVX512]>;
702 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
703 v8f64_info, vinsert128_insert,
704 INSERT_get_vinsert128_imm, [HasDQI]>;
706 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
707 v16i32_info, vinsert128_insert,
708 INSERT_get_vinsert128_imm, [HasAVX512]>;
709 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
710 v16i32_info, vinsert128_insert,
711 INSERT_get_vinsert128_imm, [HasAVX512]>;
712 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
713 v16i32_info, vinsert128_insert,
714 INSERT_get_vinsert128_imm, [HasAVX512]>;
715 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
716 v8i64_info, vinsert128_insert,
717 INSERT_get_vinsert128_imm, [HasDQI]>;
718 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
719 v8i64_info, vinsert128_insert,
720 INSERT_get_vinsert128_imm, [HasDQI]>;
721 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
722 v8i64_info, vinsert128_insert,
723 INSERT_get_vinsert128_imm, [HasDQI]>;
725 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
726 v16f32_info, vinsert256_insert,
727 INSERT_get_vinsert256_imm, [HasDQI]>;
728 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
729 v8f64_info, vinsert256_insert,
730 INSERT_get_vinsert256_imm, [HasAVX512]>;
732 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
733 v16i32_info, vinsert256_insert,
734 INSERT_get_vinsert256_imm, [HasDQI]>;
735 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
736 v16i32_info, vinsert256_insert,
737 INSERT_get_vinsert256_imm, [HasDQI]>;
738 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
739 v16i32_info, vinsert256_insert,
740 INSERT_get_vinsert256_imm, [HasDQI]>;
741 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
742 v8i64_info, vinsert256_insert,
743 INSERT_get_vinsert256_imm, [HasAVX512]>;
744 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
745 v8i64_info, vinsert256_insert,
746 INSERT_get_vinsert256_imm, [HasAVX512]>;
747 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
748 v8i64_info, vinsert256_insert,
749 INSERT_get_vinsert256_imm, [HasAVX512]>;
751 // vinsertps - insert f32 to XMM
752 let ExeDomain = SSEPackedSingle in {
753 let isCommutable = 1 in
754 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
755 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
756 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
757 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
758 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
759 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
760 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
761 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
762 [(set VR128X:$dst, (X86insertps VR128X:$src1,
763 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
765 EVEX_4V, EVEX_CD8<32, CD8VT1>,
766 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
769 //===----------------------------------------------------------------------===//
770 // AVX-512 VECTOR EXTRACT
773 // Supports two different pattern operators for mask and unmasked ops. Allows
774 // null_frag to be passed for one.
775 multiclass vextract_for_size_split<int Opcode,
776 X86VectorVTInfo From, X86VectorVTInfo To,
777 SDPatternOperator vextract_extract,
778 SDPatternOperator vextract_for_mask,
779 SchedWrite SchedRR, SchedWrite SchedMR> {
781 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
782 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
783 (ins From.RC:$src1, u8imm:$idx),
784 "vextract" # To.EltTypeName # "x" # To.NumElts,
785 "$idx, $src1", "$src1, $idx",
786 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
787 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
788 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
790 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
791 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
792 "vextract" # To.EltTypeName # "x" # To.NumElts #
793 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
794 [(store (To.VT (vextract_extract:$idx
795 (From.VT From.RC:$src1), (iPTR imm))),
799 let mayStore = 1, hasSideEffects = 0 in
800 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
801 (ins To.MemOp:$dst, To.KRCWM:$mask,
802 From.RC:$src1, u8imm:$idx),
803 "vextract" # To.EltTypeName # "x" # To.NumElts #
804 "\t{$idx, $src1, $dst {${mask}}|"
805 "$dst {${mask}}, $src1, $idx}", []>,
806 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
810 // Passes the same pattern operator for masked and unmasked ops.
811 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
813 SDPatternOperator vextract_extract,
814 SchedWrite SchedRR, SchedWrite SchedMR> :
815 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
817 // Codegen pattern for the alternative types
818 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
819 X86VectorVTInfo To, PatFrag vextract_extract,
820 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
821 let Predicates = p in {
822 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
823 (To.VT (!cast<Instruction>(InstrStr#"rr")
825 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
826 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
827 (iPTR imm))), addr:$dst),
828 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
829 (EXTRACT_get_vextract_imm To.RC:$ext))>;
833 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
834 ValueType EltVT64, int Opcode256,
835 SchedWrite SchedRR, SchedWrite SchedMR> {
836 let Predicates = [HasAVX512] in {
837 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
838 X86VectorVTInfo<16, EltVT32, VR512>,
839 X86VectorVTInfo< 4, EltVT32, VR128X>,
840 vextract128_extract, SchedRR, SchedMR>,
841 EVEX_V512, EVEX_CD8<32, CD8VT4>;
842 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
843 X86VectorVTInfo< 8, EltVT64, VR512>,
844 X86VectorVTInfo< 4, EltVT64, VR256X>,
845 vextract256_extract, SchedRR, SchedMR>,
846 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
848 let Predicates = [HasVLX] in
849 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
850 X86VectorVTInfo< 8, EltVT32, VR256X>,
851 X86VectorVTInfo< 4, EltVT32, VR128X>,
852 vextract128_extract, SchedRR, SchedMR>,
853 EVEX_V256, EVEX_CD8<32, CD8VT4>;
855 // Even with DQI we'd like to only use these instructions for masking.
856 let Predicates = [HasVLX, HasDQI] in
857 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
858 X86VectorVTInfo< 4, EltVT64, VR256X>,
859 X86VectorVTInfo< 2, EltVT64, VR128X>,
860 null_frag, vextract128_extract, SchedRR, SchedMR>,
861 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
863 // Even with DQI we'd like to only use these instructions for masking.
864 let Predicates = [HasDQI] in {
865 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
866 X86VectorVTInfo< 8, EltVT64, VR512>,
867 X86VectorVTInfo< 2, EltVT64, VR128X>,
868 null_frag, vextract128_extract, SchedRR, SchedMR>,
869 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
870 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
871 X86VectorVTInfo<16, EltVT32, VR512>,
872 X86VectorVTInfo< 8, EltVT32, VR256X>,
873 null_frag, vextract256_extract, SchedRR, SchedMR>,
874 EVEX_V512, EVEX_CD8<32, CD8VT8>;
878 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
879 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
880 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
882 // extract_subvector codegen patterns with the alternative types.
883 // Even with AVX512DQ we'll still use these for unmasked operations.
884 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
885 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
886 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
887 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
889 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
890 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
891 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
892 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
894 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
895 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
896 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
897 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
899 // Codegen pattern with the alternative types extract VEC128 from VEC256
900 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
901 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
902 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
903 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
905 // Codegen pattern with the alternative types extract VEC128 from VEC512
906 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
907 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
908 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
909 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
910 // Codegen pattern with the alternative types extract VEC256 from VEC512
911 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
912 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
913 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
914 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
917 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
918 // smaller extract to enable EVEX->VEX.
919 let Predicates = [NoVLX] in {
920 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
921 (v2i64 (VEXTRACTI128rr
922 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
924 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
925 (v2f64 (VEXTRACTF128rr
926 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
928 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
929 (v4i32 (VEXTRACTI128rr
930 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
932 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
933 (v4f32 (VEXTRACTF128rr
934 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
936 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
937 (v8i16 (VEXTRACTI128rr
938 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
940 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
941 (v16i8 (VEXTRACTI128rr
942 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
946 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
947 // smaller extract to enable EVEX->VEX.
948 let Predicates = [HasVLX] in {
949 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
950 (v2i64 (VEXTRACTI32x4Z256rr
951 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
953 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
954 (v2f64 (VEXTRACTF32x4Z256rr
955 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
957 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
958 (v4i32 (VEXTRACTI32x4Z256rr
959 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
961 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
962 (v4f32 (VEXTRACTF32x4Z256rr
963 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
965 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
966 (v8i16 (VEXTRACTI32x4Z256rr
967 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
969 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
970 (v16i8 (VEXTRACTI32x4Z256rr
971 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
976 // Additional patterns for handling a bitcast between the vselect and the
977 // extract_subvector.
978 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
979 X86VectorVTInfo To, X86VectorVTInfo Cast,
980 PatFrag vextract_extract,
981 SDNodeXForm EXTRACT_get_vextract_imm,
983 let Predicates = p in {
984 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
986 (To.VT (vextract_extract:$ext
987 (From.VT From.RC:$src), (iPTR imm)))),
989 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
990 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
991 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
993 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
995 (To.VT (vextract_extract:$ext
996 (From.VT From.RC:$src), (iPTR imm)))),
998 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
999 Cast.KRCWM:$mask, From.RC:$src,
1000 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1004 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1005 v4f32x_info, vextract128_extract,
1006 EXTRACT_get_vextract128_imm, [HasVLX]>;
1007 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1008 v2f64x_info, vextract128_extract,
1009 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1011 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1012 v4i32x_info, vextract128_extract,
1013 EXTRACT_get_vextract128_imm, [HasVLX]>;
1014 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1015 v4i32x_info, vextract128_extract,
1016 EXTRACT_get_vextract128_imm, [HasVLX]>;
1017 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1018 v4i32x_info, vextract128_extract,
1019 EXTRACT_get_vextract128_imm, [HasVLX]>;
1020 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1021 v2i64x_info, vextract128_extract,
1022 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1023 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1024 v2i64x_info, vextract128_extract,
1025 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1026 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1027 v2i64x_info, vextract128_extract,
1028 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1030 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1031 v4f32x_info, vextract128_extract,
1032 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1033 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1034 v2f64x_info, vextract128_extract,
1035 EXTRACT_get_vextract128_imm, [HasDQI]>;
1037 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1038 v4i32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1041 v4i32x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1043 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1044 v4i32x_info, vextract128_extract,
1045 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1046 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1047 v2i64x_info, vextract128_extract,
1048 EXTRACT_get_vextract128_imm, [HasDQI]>;
1049 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1050 v2i64x_info, vextract128_extract,
1051 EXTRACT_get_vextract128_imm, [HasDQI]>;
1052 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1053 v2i64x_info, vextract128_extract,
1054 EXTRACT_get_vextract128_imm, [HasDQI]>;
1056 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1057 v8f32x_info, vextract256_extract,
1058 EXTRACT_get_vextract256_imm, [HasDQI]>;
1059 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1060 v4f64x_info, vextract256_extract,
1061 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1063 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1064 v8i32x_info, vextract256_extract,
1065 EXTRACT_get_vextract256_imm, [HasDQI]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1067 v8i32x_info, vextract256_extract,
1068 EXTRACT_get_vextract256_imm, [HasDQI]>;
1069 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1070 v8i32x_info, vextract256_extract,
1071 EXTRACT_get_vextract256_imm, [HasDQI]>;
1072 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1073 v4i64x_info, vextract256_extract,
1074 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1075 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1076 v4i64x_info, vextract256_extract,
1077 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1078 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1079 v4i64x_info, vextract256_extract,
1080 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1082 // vextractps - extract 32 bits from XMM
1083 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1084 (ins VR128X:$src1, u8imm:$src2),
1085 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1086 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1087 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1089 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1090 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1091 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1092 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1094 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1096 //===---------------------------------------------------------------------===//
1097 // AVX-512 BROADCAST
1099 // broadcast with a scalar argument.
1100 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1102 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1103 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1104 (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1105 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1106 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1107 (X86VBroadcast SrcInfo.FRC:$src),
1108 DestInfo.RC:$src0)),
1109 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1110 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1111 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1112 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1113 (X86VBroadcast SrcInfo.FRC:$src),
1114 DestInfo.ImmAllZerosV)),
1115 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1116 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1119 // Split version to allow mask and broadcast node to be different types. This
1120 // helps support the 32x2 broadcasts.
1121 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1123 SchedWrite SchedRR, SchedWrite SchedRM,
1124 X86VectorVTInfo MaskInfo,
1125 X86VectorVTInfo DestInfo,
1126 X86VectorVTInfo SrcInfo,
1127 bit IsConvertibleToThreeAddress,
1128 SDPatternOperator UnmaskedOp = X86VBroadcast,
1129 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1130 let hasSideEffects = 0 in
1131 def r : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1132 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1133 [(set MaskInfo.RC:$dst,
1137 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1138 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1139 def rkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1140 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1141 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1142 "${dst} {${mask}} {z}, $src}"),
1143 [(set MaskInfo.RC:$dst,
1144 (vselect MaskInfo.KRCWM:$mask,
1148 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1149 MaskInfo.ImmAllZerosV))],
1150 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1151 let Constraints = "$src0 = $dst" in
1152 def rk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1153 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1155 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1156 "${dst} {${mask}}, $src}"),
1157 [(set MaskInfo.RC:$dst,
1158 (vselect MaskInfo.KRCWM:$mask,
1162 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1163 MaskInfo.RC:$src0))],
1164 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1166 let hasSideEffects = 0, mayLoad = 1 in
1167 def m : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1168 (ins SrcInfo.ScalarMemOp:$src),
1169 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1170 [(set MaskInfo.RC:$dst,
1174 (UnmaskedBcastOp addr:$src)))))],
1175 DestInfo.ExeDomain>, T8PD, EVEX,
1176 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1178 def mkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1179 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1180 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1181 "${dst} {${mask}} {z}, $src}"),
1182 [(set MaskInfo.RC:$dst,
1183 (vselect MaskInfo.KRCWM:$mask,
1187 (SrcInfo.BroadcastLdFrag addr:$src)))),
1188 MaskInfo.ImmAllZerosV))],
1189 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1190 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1192 let Constraints = "$src0 = $dst",
1193 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1194 def mk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1195 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1196 SrcInfo.ScalarMemOp:$src),
1197 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1198 "${dst} {${mask}}, $src}"),
1199 [(set MaskInfo.RC:$dst,
1200 (vselect MaskInfo.KRCWM:$mask,
1204 (SrcInfo.BroadcastLdFrag addr:$src)))),
1205 MaskInfo.RC:$src0))],
1206 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1207 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1210 // Helper class to force mask and broadcast result to same type.
1211 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1212 SchedWrite SchedRR, SchedWrite SchedRM,
1213 X86VectorVTInfo DestInfo,
1214 X86VectorVTInfo SrcInfo,
1215 bit IsConvertibleToThreeAddress> :
1216 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1217 DestInfo, DestInfo, SrcInfo,
1218 IsConvertibleToThreeAddress>;
1220 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1221 AVX512VLVectorVTInfo _> {
1222 let Predicates = [HasAVX512] in {
1223 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1224 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1225 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1230 let Predicates = [HasVLX] in {
1231 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1232 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1233 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1239 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1240 AVX512VLVectorVTInfo _> {
1241 let Predicates = [HasAVX512] in {
1242 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1243 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1244 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1249 let Predicates = [HasVLX] in {
1250 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1251 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1252 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1255 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1256 WriteFShuffle256Ld, _.info128, _.info128, 1>,
1257 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1262 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1264 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1265 avx512vl_f64_info>, VEX_W1X;
1267 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268 X86VectorVTInfo _, SDPatternOperator OpNode,
1269 RegisterClass SrcRC> {
1270 let ExeDomain = _.ExeDomain in
1271 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1273 "vpbroadcast"##_.Suffix, "$src", "$src",
1274 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1278 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279 X86VectorVTInfo _, SDPatternOperator OpNode,
1280 RegisterClass SrcRC, SubRegIndex Subreg> {
1281 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283 (outs _.RC:$dst), (ins GR32:$src),
1284 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1289 def : Pat <(_.VT (OpNode SrcRC:$src)),
1290 (!cast<Instruction>(Name#r)
1291 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1293 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1297 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305 let Predicates = [prd] in
1306 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307 OpNode, SrcRC, Subreg>, EVEX_V512;
1308 let Predicates = [prd, HasVLX] in {
1309 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1316 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317 SDPatternOperator OpNode,
1318 RegisterClass SrcRC, Predicate prd> {
1319 let Predicates = [prd] in
1320 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1322 let Predicates = [prd, HasVLX] in {
1323 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1325 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1330 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1335 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336 X86VBroadcast, GR32, HasAVX512>;
1337 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1340 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1341 AVX512VLVectorVTInfo _, Predicate prd,
1342 bit IsConvertibleToThreeAddress> {
1343 let Predicates = [prd] in {
1344 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1345 WriteShuffle256Ld, _.info512, _.info128,
1346 IsConvertibleToThreeAddress>,
1349 let Predicates = [prd, HasVLX] in {
1350 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1351 WriteShuffle256Ld, _.info256, _.info128,
1352 IsConvertibleToThreeAddress>,
1354 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1355 WriteShuffleXLd, _.info128, _.info128,
1356 IsConvertibleToThreeAddress>,
1361 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1362 avx512vl_i8_info, HasBWI, 0>;
1363 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1364 avx512vl_i16_info, HasBWI, 0>;
1365 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1366 avx512vl_i32_info, HasAVX512, 1>;
1367 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1368 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1370 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1371 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1372 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1373 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1374 (_Dst.VT (X86SubVBroadcast
1375 (_Src.VT (_Src.LdFrag addr:$src))))>,
1376 Sched<[SchedWriteShuffle.YMM.Folded]>,
1380 // This should be used for the AVX512DQ broadcast instructions. It disables
1381 // the unmasked patterns so that we only use the DQ instructions when masking
1383 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1384 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1385 let hasSideEffects = 0, mayLoad = 1 in
1386 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1389 (_Dst.VT (X86SubVBroadcast
1390 (_Src.VT (_Src.LdFrag addr:$src))))>,
1391 Sched<[SchedWriteShuffle.YMM.Folded]>,
1395 let Predicates = [HasAVX512] in {
1396 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1397 def : Pat<(v8i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1398 (VPBROADCASTQZm addr:$src)>;
1400 // FIXME this is to handle aligned extloads from i8.
1401 def : Pat<(v16i32 (X86VBroadcast (loadi32 addr:$src))),
1402 (VPBROADCASTDZm addr:$src)>;
1405 let Predicates = [HasVLX] in {
1406 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1407 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1408 (VPBROADCASTQZ128m addr:$src)>;
1409 def : Pat<(v4i64 (X86VBroadcast (v2i64 (X86vzload64 addr:$src)))),
1410 (VPBROADCASTQZ256m addr:$src)>;
1412 // FIXME this is to handle aligned extloads from i8.
1413 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
1414 (VPBROADCASTDZ128m addr:$src)>;
1415 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
1416 (VPBROADCASTDZ256m addr:$src)>;
1418 let Predicates = [HasVLX, HasBWI] in {
1419 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1420 // This means we'll encounter truncated i32 loads; match that here.
1421 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1422 (VPBROADCASTWZ128m addr:$src)>;
1423 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1424 (VPBROADCASTWZ256m addr:$src)>;
1425 def : Pat<(v8i16 (X86VBroadcast
1426 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1427 (VPBROADCASTWZ128m addr:$src)>;
1428 def : Pat<(v8i16 (X86VBroadcast
1429 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1430 (VPBROADCASTWZ128m addr:$src)>;
1431 def : Pat<(v16i16 (X86VBroadcast
1432 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1433 (VPBROADCASTWZ256m addr:$src)>;
1434 def : Pat<(v16i16 (X86VBroadcast
1435 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1436 (VPBROADCASTWZ256m addr:$src)>;
1438 // FIXME this is to handle aligned extloads from i8.
1439 def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
1440 (VPBROADCASTWZ128m addr:$src)>;
1441 def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
1442 (VPBROADCASTWZ256m addr:$src)>;
1444 let Predicates = [HasBWI] in {
1445 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1446 // This means we'll encounter truncated i32 loads; match that here.
1447 def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1448 (VPBROADCASTWZm addr:$src)>;
1449 def : Pat<(v32i16 (X86VBroadcast
1450 (i16 (trunc (i32 (extloadi16 addr:$src)))))),
1451 (VPBROADCASTWZm addr:$src)>;
1452 def : Pat<(v32i16 (X86VBroadcast
1453 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1454 (VPBROADCASTWZm addr:$src)>;
1456 // FIXME this is to handle aligned extloads from i8.
1457 def : Pat<(v32i16 (X86VBroadcast (loadi16 addr:$src))),
1458 (VPBROADCASTWZm addr:$src)>;
1461 //===----------------------------------------------------------------------===//
1462 // AVX-512 BROADCAST SUBVECTORS
1465 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1466 v16i32_info, v4i32x_info>,
1467 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1468 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1469 v16f32_info, v4f32x_info>,
1470 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1471 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1472 v8i64_info, v4i64x_info>, VEX_W,
1473 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1474 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1475 v8f64_info, v4f64x_info>, VEX_W,
1476 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1478 let Predicates = [HasAVX512] in {
1479 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1480 (VBROADCASTF64X4rm addr:$src)>;
1481 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1482 (VBROADCASTI64X4rm addr:$src)>;
1483 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1484 (VBROADCASTI64X4rm addr:$src)>;
1485 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1486 (VBROADCASTI64X4rm addr:$src)>;
1488 // Provide fallback in case the load node that is used in the patterns above
1489 // is used by additional users, which prevents the pattern selection.
1490 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1491 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1492 (v4f64 VR256X:$src), 1)>;
1493 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1494 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1495 (v8f32 VR256X:$src), 1)>;
1496 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1497 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1498 (v4i64 VR256X:$src), 1)>;
1499 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1500 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1501 (v8i32 VR256X:$src), 1)>;
1502 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1503 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1504 (v16i16 VR256X:$src), 1)>;
1505 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1506 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1507 (v32i8 VR256X:$src), 1)>;
1509 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1510 (VBROADCASTF32X4rm addr:$src)>;
1511 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1512 (VBROADCASTI32X4rm addr:$src)>;
1513 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1514 (VBROADCASTI32X4rm addr:$src)>;
1515 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1516 (VBROADCASTI32X4rm addr:$src)>;
1518 // Patterns for selects of bitcasted operations.
1519 def : Pat<(vselect VK16WM:$mask,
1520 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1521 (v16f32 immAllZerosV)),
1522 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1523 def : Pat<(vselect VK16WM:$mask,
1524 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1526 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1527 def : Pat<(vselect VK16WM:$mask,
1528 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1529 (v16i32 immAllZerosV)),
1530 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1531 def : Pat<(vselect VK16WM:$mask,
1532 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1534 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1536 def : Pat<(vselect VK8WM:$mask,
1537 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1538 (v8f64 immAllZerosV)),
1539 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1540 def : Pat<(vselect VK8WM:$mask,
1541 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1543 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1544 def : Pat<(vselect VK8WM:$mask,
1545 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1546 (v8i64 immAllZerosV)),
1547 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1548 def : Pat<(vselect VK8WM:$mask,
1549 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1551 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1554 let Predicates = [HasVLX] in {
1555 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1556 v8i32x_info, v4i32x_info>,
1557 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1558 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1559 v8f32x_info, v4f32x_info>,
1560 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1562 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1563 (VBROADCASTF32X4Z256rm addr:$src)>;
1564 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1565 (VBROADCASTI32X4Z256rm addr:$src)>;
1566 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1567 (VBROADCASTI32X4Z256rm addr:$src)>;
1568 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1569 (VBROADCASTI32X4Z256rm addr:$src)>;
1571 // Patterns for selects of bitcasted operations.
1572 def : Pat<(vselect VK8WM:$mask,
1573 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1574 (v8f32 immAllZerosV)),
1575 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1576 def : Pat<(vselect VK8WM:$mask,
1577 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1579 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1580 def : Pat<(vselect VK8WM:$mask,
1581 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1582 (v8i32 immAllZerosV)),
1583 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1584 def : Pat<(vselect VK8WM:$mask,
1585 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1587 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1590 // Provide fallback in case the load node that is used in the patterns above
1591 // is used by additional users, which prevents the pattern selection.
1592 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1593 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1594 (v2f64 VR128X:$src), 1)>;
1595 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1596 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1597 (v4f32 VR128X:$src), 1)>;
1598 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1599 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1600 (v2i64 VR128X:$src), 1)>;
1601 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1602 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1603 (v4i32 VR128X:$src), 1)>;
1604 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1605 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1606 (v8i16 VR128X:$src), 1)>;
1607 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1608 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1609 (v16i8 VR128X:$src), 1)>;
1612 let Predicates = [HasVLX, HasDQI] in {
1613 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1614 v4i64x_info, v2i64x_info>, VEX_W1X,
1615 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1616 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1617 v4f64x_info, v2f64x_info>, VEX_W1X,
1618 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1620 // Patterns for selects of bitcasted operations.
1621 def : Pat<(vselect VK4WM:$mask,
1622 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1623 (v4f64 immAllZerosV)),
1624 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1625 def : Pat<(vselect VK4WM:$mask,
1626 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1628 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1629 def : Pat<(vselect VK4WM:$mask,
1630 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1631 (v4i64 immAllZerosV)),
1632 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1633 def : Pat<(vselect VK4WM:$mask,
1634 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1636 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1639 let Predicates = [HasDQI] in {
1640 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1641 v8i64_info, v2i64x_info>, VEX_W,
1642 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1643 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1644 v16i32_info, v8i32x_info>,
1645 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1646 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1647 v8f64_info, v2f64x_info>, VEX_W,
1648 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1649 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1650 v16f32_info, v8f32x_info>,
1651 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1653 // Patterns for selects of bitcasted operations.
1654 def : Pat<(vselect VK16WM:$mask,
1655 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1656 (v16f32 immAllZerosV)),
1657 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1658 def : Pat<(vselect VK16WM:$mask,
1659 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1661 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1662 def : Pat<(vselect VK16WM:$mask,
1663 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1664 (v16i32 immAllZerosV)),
1665 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1666 def : Pat<(vselect VK16WM:$mask,
1667 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1669 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1671 def : Pat<(vselect VK8WM:$mask,
1672 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1673 (v8f64 immAllZerosV)),
1674 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1675 def : Pat<(vselect VK8WM:$mask,
1676 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1678 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1679 def : Pat<(vselect VK8WM:$mask,
1680 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1681 (v8i64 immAllZerosV)),
1682 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1683 def : Pat<(vselect VK8WM:$mask,
1684 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1686 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1689 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1690 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1691 let Predicates = [HasDQI] in
1692 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1693 WriteShuffle256Ld, _Dst.info512,
1694 _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1696 let Predicates = [HasDQI, HasVLX] in
1697 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1698 WriteShuffle256Ld, _Dst.info256,
1699 _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1703 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1704 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1705 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1707 let Predicates = [HasDQI, HasVLX] in
1708 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1709 WriteShuffleXLd, _Dst.info128,
1710 _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1714 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1715 avx512vl_i32_info, avx512vl_i64_info>;
1716 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1717 avx512vl_f32_info, avx512vl_f64_info>;
1719 //===----------------------------------------------------------------------===//
1720 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1722 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1723 X86VectorVTInfo _, RegisterClass KRC> {
1724 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1725 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1726 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1727 EVEX, Sched<[WriteShuffle]>;
1730 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1731 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1732 let Predicates = [HasCDI] in
1733 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1734 let Predicates = [HasCDI, HasVLX] in {
1735 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1736 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1740 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1741 avx512vl_i32_info, VK16>;
1742 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1743 avx512vl_i64_info, VK8>, VEX_W;
1745 //===----------------------------------------------------------------------===//
1746 // -- VPERMI2 - 3 source operands form --
1747 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1748 X86FoldableSchedWrite sched,
1749 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1750 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1751 hasSideEffects = 0 in {
1752 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1753 (ins _.RC:$src2, _.RC:$src3),
1754 OpcodeStr, "$src3, $src2", "$src2, $src3",
1755 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1756 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1759 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1760 (ins _.RC:$src2, _.MemOp:$src3),
1761 OpcodeStr, "$src3, $src2", "$src2, $src3",
1762 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1763 (_.VT (_.LdFrag addr:$src3)))), 1>,
1764 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1768 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1769 X86FoldableSchedWrite sched,
1770 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1771 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1772 hasSideEffects = 0, mayLoad = 1 in
1773 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1774 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1775 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1776 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1777 (_.VT (X86VPermt2 _.RC:$src2,
1778 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1779 AVX5128IBase, EVEX_4V, EVEX_B,
1780 Sched<[sched.Folded, sched.ReadAfterFold]>;
1783 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1784 X86FoldableSchedWrite sched,
1785 AVX512VLVectorVTInfo VTInfo,
1786 AVX512VLVectorVTInfo ShuffleMask> {
1787 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1788 ShuffleMask.info512>,
1789 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1790 ShuffleMask.info512>, EVEX_V512;
1791 let Predicates = [HasVLX] in {
1792 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1793 ShuffleMask.info128>,
1794 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1795 ShuffleMask.info128>, EVEX_V128;
1796 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1797 ShuffleMask.info256>,
1798 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1799 ShuffleMask.info256>, EVEX_V256;
1803 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1804 X86FoldableSchedWrite sched,
1805 AVX512VLVectorVTInfo VTInfo,
1806 AVX512VLVectorVTInfo Idx,
1808 let Predicates = [Prd] in
1809 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1810 Idx.info512>, EVEX_V512;
1811 let Predicates = [Prd, HasVLX] in {
1812 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1813 Idx.info128>, EVEX_V128;
1814 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1815 Idx.info256>, EVEX_V256;
1819 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1820 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1821 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1822 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1823 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1824 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1825 VEX_W, EVEX_CD8<16, CD8VF>;
1826 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1827 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1829 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1830 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1831 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1832 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1834 // Extra patterns to deal with extra bitcasts due to passthru and index being
1835 // different types on the fp versions.
1836 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1837 X86VectorVTInfo IdxVT,
1838 X86VectorVTInfo CastVT> {
1839 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1840 (X86VPermt2 (_.VT _.RC:$src2),
1841 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1842 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1843 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1844 _.RC:$src2, _.RC:$src3)>;
1845 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1846 (X86VPermt2 _.RC:$src2,
1847 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1848 (_.LdFrag addr:$src3)),
1849 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1850 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1851 _.RC:$src2, addr:$src3)>;
1852 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1853 (X86VPermt2 _.RC:$src2,
1854 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1855 (_.BroadcastLdFrag addr:$src3)),
1856 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1857 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1858 _.RC:$src2, addr:$src3)>;
1861 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1862 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1863 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1864 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1867 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1868 X86FoldableSchedWrite sched,
1869 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1870 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1871 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1872 (ins IdxVT.RC:$src2, _.RC:$src3),
1873 OpcodeStr, "$src3, $src2", "$src2, $src3",
1874 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1875 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1877 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1878 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1879 OpcodeStr, "$src3, $src2", "$src2, $src3",
1880 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1881 (_.LdFrag addr:$src3))), 1>,
1882 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1885 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1886 X86FoldableSchedWrite sched,
1887 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1888 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1889 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1890 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1891 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1892 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1893 (_.VT (X86VPermt2 _.RC:$src1,
1894 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1895 AVX5128IBase, EVEX_4V, EVEX_B,
1896 Sched<[sched.Folded, sched.ReadAfterFold]>;
1899 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1900 X86FoldableSchedWrite sched,
1901 AVX512VLVectorVTInfo VTInfo,
1902 AVX512VLVectorVTInfo ShuffleMask> {
1903 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1904 ShuffleMask.info512>,
1905 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1906 ShuffleMask.info512>, EVEX_V512;
1907 let Predicates = [HasVLX] in {
1908 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1909 ShuffleMask.info128>,
1910 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1911 ShuffleMask.info128>, EVEX_V128;
1912 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1913 ShuffleMask.info256>,
1914 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1915 ShuffleMask.info256>, EVEX_V256;
1919 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1920 X86FoldableSchedWrite sched,
1921 AVX512VLVectorVTInfo VTInfo,
1922 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1923 let Predicates = [Prd] in
1924 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1925 Idx.info512>, EVEX_V512;
1926 let Predicates = [Prd, HasVLX] in {
1927 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1928 Idx.info128>, EVEX_V128;
1929 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1930 Idx.info256>, EVEX_V256;
1934 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1935 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1936 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1937 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1938 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1939 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1940 VEX_W, EVEX_CD8<16, CD8VF>;
1941 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1942 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1944 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1945 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1946 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1947 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1949 //===----------------------------------------------------------------------===//
1950 // AVX-512 - BLEND using mask
1953 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1954 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1955 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1956 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1957 (ins _.RC:$src1, _.RC:$src2),
1958 !strconcat(OpcodeStr,
1959 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1960 EVEX_4V, Sched<[sched]>;
1961 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1962 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1963 !strconcat(OpcodeStr,
1964 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1965 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1966 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1967 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1968 !strconcat(OpcodeStr,
1969 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1970 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1971 let mayLoad = 1 in {
1972 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1973 (ins _.RC:$src1, _.MemOp:$src2),
1974 !strconcat(OpcodeStr,
1975 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1976 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1977 Sched<[sched.Folded, sched.ReadAfterFold]>;
1978 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1979 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1980 !strconcat(OpcodeStr,
1981 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1982 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1983 Sched<[sched.Folded, sched.ReadAfterFold]>;
1984 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1985 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1986 !strconcat(OpcodeStr,
1987 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1988 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1989 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1993 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1994 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1995 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1996 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1997 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1998 !strconcat(OpcodeStr,
1999 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2000 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2001 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2002 Sched<[sched.Folded, sched.ReadAfterFold]>;
2004 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2005 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
2006 !strconcat(OpcodeStr,
2007 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
2008 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2009 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2010 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2012 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
2013 (ins _.RC:$src1, _.ScalarMemOp:$src2),
2014 !strconcat(OpcodeStr,
2015 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2016 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2017 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2018 Sched<[sched.Folded, sched.ReadAfterFold]>;
2022 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2023 AVX512VLVectorVTInfo VTInfo> {
2024 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2025 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2028 let Predicates = [HasVLX] in {
2029 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2030 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2032 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2033 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2038 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2039 AVX512VLVectorVTInfo VTInfo> {
2040 let Predicates = [HasBWI] in
2041 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2044 let Predicates = [HasBWI, HasVLX] in {
2045 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2047 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2052 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2054 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2055 avx512vl_f64_info>, VEX_W;
2056 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2058 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2059 avx512vl_i64_info>, VEX_W;
2060 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2062 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2063 avx512vl_i16_info>, VEX_W;
2065 //===----------------------------------------------------------------------===//
2066 // Compare Instructions
2067 //===----------------------------------------------------------------------===//
2069 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2071 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2072 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2073 X86FoldableSchedWrite sched> {
2074 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2076 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2078 "$cc, $src2, $src1", "$src1, $src2, $cc",
2079 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2080 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2081 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2083 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2085 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2087 "$cc, $src2, $src1", "$src1, $src2, $cc",
2088 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2090 (OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2091 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2092 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2094 let Uses = [MXCSR] in
2095 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2097 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2099 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2100 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2102 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2104 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2106 let isCodeGenOnly = 1 in {
2107 let isCommutable = 1 in
2108 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2109 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2110 !strconcat("vcmp", _.Suffix,
2111 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2112 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2115 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2116 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2118 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2119 !strconcat("vcmp", _.Suffix,
2120 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2121 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2122 (_.ScalarLdFrag addr:$src2),
2124 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2125 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2129 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2130 (X86cmpms node:$src1, node:$src2, node:$cc), [{
2131 return N->hasOneUse();
2133 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2134 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2135 return N->hasOneUse();
2138 let Predicates = [HasAVX512] in {
2139 let ExeDomain = SSEPackedSingle in
2140 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2141 X86cmpms_su, X86cmpmsSAE_su,
2142 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2143 let ExeDomain = SSEPackedDouble in
2144 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2145 X86cmpms_su, X86cmpmsSAE_su,
2146 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2149 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2150 X86FoldableSchedWrite sched,
2151 X86VectorVTInfo _, bit IsCommutable> {
2152 let isCommutable = IsCommutable, hasSideEffects = 0 in
2153 def rr : AVX512BI<opc, MRMSrcReg,
2154 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2155 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2156 []>, EVEX_4V, Sched<[sched]>;
2157 let mayLoad = 1, hasSideEffects = 0 in
2158 def rm : AVX512BI<opc, MRMSrcMem,
2159 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2160 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2161 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2162 let isCommutable = IsCommutable, hasSideEffects = 0 in
2163 def rrk : AVX512BI<opc, MRMSrcReg,
2164 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2165 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2166 "$dst {${mask}}, $src1, $src2}"),
2167 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2168 let mayLoad = 1, hasSideEffects = 0 in
2169 def rmk : AVX512BI<opc, MRMSrcMem,
2170 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2171 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2172 "$dst {${mask}}, $src1, $src2}"),
2173 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2176 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2177 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2179 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2180 let mayLoad = 1, hasSideEffects = 0 in {
2181 def rmb : AVX512BI<opc, MRMSrcMem,
2182 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2183 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2184 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2185 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2186 def rmbk : AVX512BI<opc, MRMSrcMem,
2187 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2188 _.ScalarMemOp:$src2),
2189 !strconcat(OpcodeStr,
2190 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2191 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2192 []>, EVEX_4V, EVEX_K, EVEX_B,
2193 Sched<[sched.Folded, sched.ReadAfterFold]>;
2197 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2198 X86SchedWriteWidths sched,
2199 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2200 bit IsCommutable = 0> {
2201 let Predicates = [prd] in
2202 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2203 VTInfo.info512, IsCommutable>, EVEX_V512;
2205 let Predicates = [prd, HasVLX] in {
2206 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2207 VTInfo.info256, IsCommutable>, EVEX_V256;
2208 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2209 VTInfo.info128, IsCommutable>, EVEX_V128;
2213 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2214 X86SchedWriteWidths sched,
2215 AVX512VLVectorVTInfo VTInfo,
2216 Predicate prd, bit IsCommutable = 0> {
2217 let Predicates = [prd] in
2218 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2219 VTInfo.info512, IsCommutable>, EVEX_V512;
2221 let Predicates = [prd, HasVLX] in {
2222 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2223 VTInfo.info256, IsCommutable>, EVEX_V256;
2224 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2225 VTInfo.info128, IsCommutable>, EVEX_V128;
2229 // This fragment treats X86cmpm as commutable to help match loads in both
2230 // operands for PCMPEQ.
2231 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2232 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2233 (setcc node:$src1, node:$src2, SETGT)>;
2235 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2236 // increase the pattern complexity the way an immediate would.
2237 let AddedComplexity = 2 in {
2238 // FIXME: Is there a better scheduler class for VPCMP?
2239 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2240 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2241 EVEX_CD8<8, CD8VF>, VEX_WIG;
2243 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2244 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2245 EVEX_CD8<16, CD8VF>, VEX_WIG;
2247 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2248 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2249 EVEX_CD8<32, CD8VF>;
2251 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2252 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2253 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2255 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2256 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2257 EVEX_CD8<8, CD8VF>, VEX_WIG;
2259 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2260 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2261 EVEX_CD8<16, CD8VF>, VEX_WIG;
2263 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2264 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2265 EVEX_CD8<32, CD8VF>;
2267 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2268 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2269 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2272 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2273 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2274 X86FoldableSchedWrite sched,
2275 X86VectorVTInfo _, string Name> {
2276 let isCommutable = 1 in
2277 def rri : AVX512AIi8<opc, MRMSrcReg,
2278 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2279 !strconcat("vpcmp", Suffix,
2280 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2281 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2284 EVEX_4V, Sched<[sched]>;
2285 def rmi : AVX512AIi8<opc, MRMSrcMem,
2286 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2287 !strconcat("vpcmp", Suffix,
2288 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2289 [(set _.KRC:$dst, (_.KVT
2292 (_.VT (_.LdFrag addr:$src2)),
2294 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2295 let isCommutable = 1 in
2296 def rrik : AVX512AIi8<opc, MRMSrcReg,
2297 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2299 !strconcat("vpcmp", Suffix,
2300 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2301 "$dst {${mask}}, $src1, $src2, $cc}"),
2302 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2303 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2306 EVEX_4V, EVEX_K, Sched<[sched]>;
2307 def rmik : AVX512AIi8<opc, MRMSrcMem,
2308 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2310 !strconcat("vpcmp", Suffix,
2311 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2312 "$dst {${mask}}, $src1, $src2, $cc}"),
2313 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2317 (_.VT (_.LdFrag addr:$src2)),
2319 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2321 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2322 (_.VT _.RC:$src1), cond)),
2323 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2324 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2326 def : Pat<(and _.KRCWM:$mask,
2327 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2328 (_.VT _.RC:$src1), cond))),
2329 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2330 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2331 (CommFrag.OperandTransform $cc))>;
2334 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2335 PatFrag Frag_su, PatFrag CommFrag,
2336 PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2337 X86VectorVTInfo _, string Name> :
2338 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2340 def rmib : AVX512AIi8<opc, MRMSrcMem,
2341 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2343 !strconcat("vpcmp", Suffix,
2344 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2345 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2346 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2348 (_.BroadcastLdFrag addr:$src2),
2350 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2351 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2352 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2353 _.ScalarMemOp:$src2, u8imm:$cc),
2354 !strconcat("vpcmp", Suffix,
2355 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2356 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2357 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2360 (_.BroadcastLdFrag addr:$src2),
2362 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2364 def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2365 (_.VT _.RC:$src1), cond)),
2366 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2367 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2369 def : Pat<(and _.KRCWM:$mask,
2370 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2371 (_.VT _.RC:$src1), cond))),
2372 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2373 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2374 (CommFrag_su.OperandTransform $cc))>;
2377 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2378 PatFrag Frag_su, PatFrag CommFrag,
2379 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2380 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2381 let Predicates = [prd] in
2382 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2383 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2385 let Predicates = [prd, HasVLX] in {
2386 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2387 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2388 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2389 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2393 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2394 PatFrag Frag_su, PatFrag CommFrag,
2395 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2396 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2397 let Predicates = [prd] in
2398 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2399 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2401 let Predicates = [prd, HasVLX] in {
2402 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2403 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2404 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2405 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2409 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2410 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2411 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2412 return getI8Imm(SSECC, SDLoc(N));
2415 // Swapped operand version of the above.
2416 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2417 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2418 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2419 SSECC = X86::getSwappedVPCMPImm(SSECC);
2420 return getI8Imm(SSECC, SDLoc(N));
2423 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2424 (setcc node:$src1, node:$src2, node:$cc), [{
2425 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2426 return !ISD::isUnsignedIntSetCC(CC);
2429 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2430 (setcc node:$src1, node:$src2, node:$cc), [{
2431 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2432 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2435 // Same as above, but commutes immediate. Use for load folding.
2436 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2437 (setcc node:$src1, node:$src2, node:$cc), [{
2438 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2439 return !ISD::isUnsignedIntSetCC(CC);
2440 }], X86pcmpm_imm_commute>;
2442 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2443 (setcc node:$src1, node:$src2, node:$cc), [{
2444 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2445 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2446 }], X86pcmpm_imm_commute>;
2448 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2449 (setcc node:$src1, node:$src2, node:$cc), [{
2450 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2451 return ISD::isUnsignedIntSetCC(CC);
2454 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2455 (setcc node:$src1, node:$src2, node:$cc), [{
2456 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2457 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2460 // Same as above, but commutes immediate. Use for load folding.
2461 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2462 (setcc node:$src1, node:$src2, node:$cc), [{
2463 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2464 return ISD::isUnsignedIntSetCC(CC);
2465 }], X86pcmpm_imm_commute>;
2467 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2468 (setcc node:$src1, node:$src2, node:$cc), [{
2469 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2470 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2471 }], X86pcmpm_imm_commute>;
2473 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2474 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2475 X86pcmpm_commute, X86pcmpm_commute_su,
2476 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2478 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2479 X86pcmpum_commute, X86pcmpum_commute_su,
2480 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2483 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2484 X86pcmpm_commute, X86pcmpm_commute_su,
2485 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2486 VEX_W, EVEX_CD8<16, CD8VF>;
2487 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2488 X86pcmpum_commute, X86pcmpum_commute_su,
2489 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2490 VEX_W, EVEX_CD8<16, CD8VF>;
2492 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2493 X86pcmpm_commute, X86pcmpm_commute_su,
2494 SchedWriteVecALU, avx512vl_i32_info,
2495 HasAVX512>, EVEX_CD8<32, CD8VF>;
2496 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2497 X86pcmpum_commute, X86pcmpum_commute_su,
2498 SchedWriteVecALU, avx512vl_i32_info,
2499 HasAVX512>, EVEX_CD8<32, CD8VF>;
2501 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2502 X86pcmpm_commute, X86pcmpm_commute_su,
2503 SchedWriteVecALU, avx512vl_i64_info,
2504 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2505 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2506 X86pcmpum_commute, X86pcmpum_commute_su,
2507 SchedWriteVecALU, avx512vl_i64_info,
2508 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2510 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2511 (X86cmpm node:$src1, node:$src2, node:$cc), [{
2512 return N->hasOneUse();
2514 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2515 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2516 return N->hasOneUse();
2519 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2520 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2521 return getI8Imm(Imm, SDLoc(N));
2524 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2526 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2527 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2528 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2530 "$cc, $src2, $src1", "$src1, $src2, $cc",
2531 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2532 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2535 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2536 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2538 "$cc, $src2, $src1", "$src1, $src2, $cc",
2539 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2541 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2543 Sched<[sched.Folded, sched.ReadAfterFold]>;
2545 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2547 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2549 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2550 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2551 (X86any_cmpm (_.VT _.RC:$src1),
2552 (_.VT (_.BroadcastLdFrag addr:$src2)),
2554 (X86cmpm_su (_.VT _.RC:$src1),
2555 (_.VT (_.BroadcastLdFrag addr:$src2)),
2557 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2560 // Patterns for selecting with loads in other operand.
2561 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2563 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2564 (X86cmpm_imm_commute timm:$cc))>;
2566 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2569 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2570 _.RC:$src1, addr:$src2,
2571 (X86cmpm_imm_commute timm:$cc))>;
2573 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2574 (_.VT _.RC:$src1), timm:$cc),
2575 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2576 (X86cmpm_imm_commute timm:$cc))>;
2578 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2581 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2582 _.RC:$src1, addr:$src2,
2583 (X86cmpm_imm_commute timm:$cc))>;
2586 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2587 // comparison code form (VCMP[EQ/LT/LE/...]
2588 let Uses = [MXCSR] in
2589 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2590 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2592 "$cc, {sae}, $src2, $src1",
2593 "$src1, $src2, {sae}, $cc",
2594 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2595 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2597 EVEX_B, Sched<[sched]>;
2600 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2601 let Predicates = [HasAVX512] in {
2602 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2603 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2606 let Predicates = [HasAVX512,HasVLX] in {
2607 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2608 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2612 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2613 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2614 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2615 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2617 // Patterns to select fp compares with load as first operand.
2618 let Predicates = [HasAVX512] in {
2619 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2621 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2623 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2625 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2628 // ----------------------------------------------------------------
2631 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2632 (X86Vfpclasss node:$src1, node:$src2), [{
2633 return N->hasOneUse();
2636 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2637 (X86Vfpclass node:$src1, node:$src2), [{
2638 return N->hasOneUse();
2641 //handle fpclass instruction mask = op(reg_scalar,imm)
2642 // op(mem_scalar,imm)
2643 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2644 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2646 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2647 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2648 (ins _.RC:$src1, i32u8imm:$src2),
2649 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2650 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2651 (i32 timm:$src2)))]>,
2653 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2654 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2655 OpcodeStr##_.Suffix#
2656 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2657 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2658 (X86Vfpclasss_su (_.VT _.RC:$src1),
2659 (i32 timm:$src2))))]>,
2660 EVEX_K, Sched<[sched]>;
2661 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2662 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2663 OpcodeStr##_.Suffix##
2664 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2666 (X86Vfpclasss _.ScalarIntMemCPat:$src1,
2667 (i32 timm:$src2)))]>,
2668 Sched<[sched.Folded, sched.ReadAfterFold]>;
2669 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2670 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2671 OpcodeStr##_.Suffix##
2672 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2673 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2674 (X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
2675 (i32 timm:$src2))))]>,
2676 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2680 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2681 // fpclass(reg_vec, mem_vec, imm)
2682 // fpclass(reg_vec, broadcast(eltVt), imm)
2683 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2684 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2686 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2687 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2688 (ins _.RC:$src1, i32u8imm:$src2),
2689 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2690 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2691 (i32 timm:$src2)))]>,
2693 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2694 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2695 OpcodeStr##_.Suffix#
2696 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2697 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2698 (X86Vfpclass_su (_.VT _.RC:$src1),
2699 (i32 timm:$src2))))]>,
2700 EVEX_K, Sched<[sched]>;
2701 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2702 (ins _.MemOp:$src1, i32u8imm:$src2),
2703 OpcodeStr##_.Suffix#"{"#mem#"}"#
2704 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2705 [(set _.KRC:$dst,(X86Vfpclass
2706 (_.VT (_.LdFrag addr:$src1)),
2707 (i32 timm:$src2)))]>,
2708 Sched<[sched.Folded, sched.ReadAfterFold]>;
2709 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2710 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2711 OpcodeStr##_.Suffix#"{"#mem#"}"#
2712 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2713 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2714 (_.VT (_.LdFrag addr:$src1)),
2715 (i32 timm:$src2))))]>,
2716 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2717 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2718 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2719 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2720 _.BroadcastStr##", $dst|$dst, ${src1}"
2721 ##_.BroadcastStr##", $src2}",
2722 [(set _.KRC:$dst,(X86Vfpclass
2723 (_.VT (_.BroadcastLdFrag addr:$src1)),
2724 (i32 timm:$src2)))]>,
2725 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2726 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2727 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2728 OpcodeStr##_.Suffix##"\t{$src2, ${src1}"##
2729 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2730 _.BroadcastStr##", $src2}",
2731 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2732 (_.VT (_.BroadcastLdFrag addr:$src1)),
2733 (i32 timm:$src2))))]>,
2734 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2737 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2739 def : InstAlias<OpcodeStr#_.Suffix#mem#
2740 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2741 (!cast<Instruction>(NAME#"rr")
2742 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2743 def : InstAlias<OpcodeStr#_.Suffix#mem#
2744 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2745 (!cast<Instruction>(NAME#"rrk")
2746 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2747 def : InstAlias<OpcodeStr#_.Suffix#mem#
2748 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2749 _.BroadcastStr#", $src2}",
2750 (!cast<Instruction>(NAME#"rmb")
2751 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2752 def : InstAlias<OpcodeStr#_.Suffix#mem#
2753 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2754 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2755 (!cast<Instruction>(NAME#"rmbk")
2756 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2759 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2760 bits<8> opc, X86SchedWriteWidths sched,
2762 let Predicates = [prd] in {
2763 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2764 _.info512, "z">, EVEX_V512;
2766 let Predicates = [prd, HasVLX] in {
2767 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2768 _.info128, "x">, EVEX_V128;
2769 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2770 _.info256, "y">, EVEX_V256;
2774 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2775 bits<8> opcScalar, X86SchedWriteWidths sched,
2777 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2779 EVEX_CD8<32, CD8VF>;
2780 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2782 EVEX_CD8<64, CD8VF> , VEX_W;
2783 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2784 sched.Scl, f32x_info, prd>, VEX_LIG,
2785 EVEX_CD8<32, CD8VT1>;
2786 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2787 sched.Scl, f64x_info, prd>, VEX_LIG,
2788 EVEX_CD8<64, CD8VT1>, VEX_W;
2791 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2792 HasDQI>, AVX512AIi8Base, EVEX;
2794 //-----------------------------------------------------------------
2795 // Mask register copy, including
2796 // - copy between mask registers
2797 // - load/store mask registers
2798 // - copy from GPR to mask register and vice versa
2800 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2801 string OpcodeStr, RegisterClass KRC,
2802 ValueType vvt, X86MemOperand x86memop> {
2803 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2804 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2805 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2807 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2808 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2809 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2811 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2812 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2813 [(store KRC:$src, addr:$dst)]>,
2814 Sched<[WriteStore]>;
2817 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2819 RegisterClass KRC, RegisterClass GRC> {
2820 let hasSideEffects = 0 in {
2821 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2822 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2824 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2825 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2830 let Predicates = [HasDQI] in
2831 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2832 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2835 let Predicates = [HasAVX512] in
2836 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2837 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2840 let Predicates = [HasBWI] in {
2841 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2843 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2845 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2847 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2851 // GR from/to mask register
2852 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2853 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2854 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2855 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2857 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2858 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2859 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2860 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2862 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2863 (KMOVWrk VK16:$src)>;
2864 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2865 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2866 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2867 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2868 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2869 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2871 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2872 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2873 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2874 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2875 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2876 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2877 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2878 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2880 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2881 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2882 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2883 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2884 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2885 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2886 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2887 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2890 let Predicates = [HasDQI] in {
2891 def : Pat<(store VK1:$src, addr:$dst),
2892 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2894 def : Pat<(v1i1 (load addr:$src)),
2895 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2896 def : Pat<(v2i1 (load addr:$src)),
2897 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2898 def : Pat<(v4i1 (load addr:$src)),
2899 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2902 let Predicates = [HasAVX512] in {
2903 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2904 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2905 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2906 (KMOVWkm addr:$src)>;
2909 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2910 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2911 SDTCVecEltisVT<1, i1>,
2914 let Predicates = [HasAVX512] in {
2915 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2916 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2917 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2919 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2920 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2922 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2923 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2925 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2926 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2929 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2930 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2931 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2932 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2933 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2934 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2935 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2937 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2938 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2941 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2945 // Mask unary operation
2947 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2948 RegisterClass KRC, SDPatternOperator OpNode,
2949 X86FoldableSchedWrite sched, Predicate prd> {
2950 let Predicates = [prd] in
2951 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2952 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2953 [(set KRC:$dst, (OpNode KRC:$src))]>,
2957 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2958 SDPatternOperator OpNode,
2959 X86FoldableSchedWrite sched> {
2960 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2961 sched, HasDQI>, VEX, PD;
2962 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2963 sched, HasAVX512>, VEX, PS;
2964 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2965 sched, HasBWI>, VEX, PD, VEX_W;
2966 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2967 sched, HasBWI>, VEX, PS, VEX_W;
2970 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2971 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2973 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2974 let Predicates = [HasAVX512, NoDQI] in
2975 def : Pat<(vnot VK8:$src),
2976 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2978 def : Pat<(vnot VK4:$src),
2979 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2980 def : Pat<(vnot VK2:$src),
2981 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2983 // Mask binary operation
2984 // - KAND, KANDN, KOR, KXNOR, KXOR
2985 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2986 RegisterClass KRC, SDPatternOperator OpNode,
2987 X86FoldableSchedWrite sched, Predicate prd,
2989 let Predicates = [prd], isCommutable = IsCommutable in
2990 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2991 !strconcat(OpcodeStr,
2992 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2993 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2997 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2998 SDPatternOperator OpNode,
2999 X86FoldableSchedWrite sched, bit IsCommutable,
3000 Predicate prdW = HasAVX512> {
3001 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3002 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
3003 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3004 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
3005 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3006 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
3007 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3008 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
3011 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
3012 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
3013 // These nodes use 'vnot' instead of 'not' to support vectors.
3014 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3015 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3017 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3018 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
3019 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
3020 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
3021 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
3022 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
3023 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3025 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3027 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3028 // for the DQI set, this type is legal and KxxxB instruction is used
3029 let Predicates = [NoDQI] in
3030 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3032 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3033 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3035 // All types smaller than 8 bits require conversion anyway
3036 def : Pat<(OpNode VK1:$src1, VK1:$src2),
3037 (COPY_TO_REGCLASS (Inst
3038 (COPY_TO_REGCLASS VK1:$src1, VK16),
3039 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3040 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3041 (COPY_TO_REGCLASS (Inst
3042 (COPY_TO_REGCLASS VK2:$src1, VK16),
3043 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3044 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3045 (COPY_TO_REGCLASS (Inst
3046 (COPY_TO_REGCLASS VK4:$src1, VK16),
3047 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3050 defm : avx512_binop_pat<and, and, KANDWrr>;
3051 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3052 defm : avx512_binop_pat<or, or, KORWrr>;
3053 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3054 defm : avx512_binop_pat<xor, xor, KXORWrr>;
3057 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3058 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3060 let Predicates = [prd] in {
3061 let hasSideEffects = 0 in
3062 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3063 (ins Src.KRC:$src1, Src.KRC:$src2),
3064 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3065 VEX_4V, VEX_L, Sched<[sched]>;
3067 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3068 (!cast<Instruction>(NAME##rr) Src.KRC:$src2, Src.KRC:$src1)>;
3072 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
3073 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3074 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3077 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3078 SDNode OpNode, X86FoldableSchedWrite sched,
3080 let Predicates = [prd], Defs = [EFLAGS] in
3081 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3082 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3083 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3087 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3088 X86FoldableSchedWrite sched,
3089 Predicate prdW = HasAVX512> {
3090 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3092 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3094 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3096 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3100 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3101 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3102 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3105 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3106 SDNode OpNode, X86FoldableSchedWrite sched> {
3107 let Predicates = [HasAVX512] in
3108 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3109 !strconcat(OpcodeStr,
3110 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3111 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3115 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3116 SDNode OpNode, X86FoldableSchedWrite sched> {
3117 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3118 sched>, VEX, TAPD, VEX_W;
3119 let Predicates = [HasDQI] in
3120 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3122 let Predicates = [HasBWI] in {
3123 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3124 sched>, VEX, TAPD, VEX_W;
3125 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3130 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3131 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3133 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3134 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3136 X86VectorVTInfo Narrow,
3137 X86VectorVTInfo Wide> {
3138 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3139 (Narrow.VT Narrow.RC:$src2), cond)),
3141 (!cast<Instruction>(InstStr#"Zrri")
3142 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3143 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3144 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3146 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3147 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3148 (Narrow.VT Narrow.RC:$src2),
3150 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3151 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3152 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3153 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3154 (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3157 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3158 PatFrag CommFrag, PatFrag CommFrag_su,
3160 X86VectorVTInfo Narrow,
3161 X86VectorVTInfo Wide> {
3163 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3164 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3166 (!cast<Instruction>(InstStr#"Zrmib")
3167 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3168 addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3170 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3172 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3173 (Narrow.BroadcastLdFrag addr:$src2),
3175 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3176 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3177 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3178 addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3180 // Commuted with broadcast load.
3181 def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3182 (Narrow.VT Narrow.RC:$src1),
3185 (!cast<Instruction>(InstStr#"Zrmib")
3186 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3187 addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3189 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3191 (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3192 (Narrow.VT Narrow.RC:$src1),
3194 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3195 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3196 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3197 addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3200 // Same as above, but for fp types which don't use PatFrags.
3201 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3202 X86VectorVTInfo Narrow,
3203 X86VectorVTInfo Wide> {
3204 def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
3205 (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3207 (!cast<Instruction>(InstStr#"Zrri")
3208 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3209 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3210 timm:$cc), Narrow.KRC)>;
3212 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3213 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3214 (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3215 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3216 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3217 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3218 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3219 timm:$cc), Narrow.KRC)>;
3222 def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT Narrow.RC:$src1),
3223 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3225 (!cast<Instruction>(InstStr#"Zrmbi")
3226 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3227 addr:$src2, timm:$cc), Narrow.KRC)>;
3229 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3230 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3231 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3232 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3233 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3234 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3235 addr:$src2, timm:$cc), Narrow.KRC)>;
3237 // Commuted with broadcast load.
3238 def : Pat<(Narrow.KVT (X86any_cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3239 (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3241 (!cast<Instruction>(InstStr#"Zrmbi")
3242 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3243 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3245 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3246 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3247 (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3248 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3249 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3250 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3251 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3254 let Predicates = [HasAVX512, NoVLX] in {
3255 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3256 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3258 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3259 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3261 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3262 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3264 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3265 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3267 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3268 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3270 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3271 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3273 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3274 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3276 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3277 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3279 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3280 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3281 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3282 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3285 let Predicates = [HasBWI, NoVLX] in {
3286 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3287 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3289 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3290 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3292 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3293 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3295 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3296 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3299 // Mask setting all 0s or 1s
3300 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3301 let Predicates = [HasAVX512] in
3302 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3303 SchedRW = [WriteZero] in
3304 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3305 [(set KRC:$dst, (VT Val))]>;
3308 multiclass avx512_mask_setop_w<PatFrag Val> {
3309 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3310 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3311 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3314 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3315 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3317 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3318 let Predicates = [HasAVX512] in {
3319 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3320 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3321 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3322 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3323 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3324 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3325 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3326 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3329 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3330 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3331 RegisterClass RC, ValueType VT> {
3332 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3333 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3335 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3336 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3338 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3339 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3340 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3341 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3342 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3343 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3345 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3346 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3347 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3348 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3349 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3351 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3352 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3353 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3354 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3356 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3357 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3358 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3360 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3361 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3363 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3365 //===----------------------------------------------------------------------===//
3366 // AVX-512 - Aligned and unaligned load and store
3369 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3370 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3371 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3372 bit NoRMPattern = 0,
3373 SDPatternOperator SelectOprr = vselect> {
3374 let hasSideEffects = 0 in {
3375 let isMoveReg = 1 in
3376 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3377 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3378 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3379 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3380 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3381 (ins _.KRCWM:$mask, _.RC:$src),
3382 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3383 "${dst} {${mask}} {z}, $src}"),
3384 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3386 _.ImmAllZerosV)))], _.ExeDomain>,
3387 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3389 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3390 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3391 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3392 !if(NoRMPattern, [],
3394 (_.VT (ld_frag addr:$src)))]),
3395 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3396 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3398 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3399 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3400 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3401 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3402 "${dst} {${mask}}, $src1}"),
3403 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3405 (_.VT _.RC:$src0))))], _.ExeDomain>,
3406 EVEX, EVEX_K, Sched<[Sched.RR]>;
3407 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3408 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3409 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3410 "${dst} {${mask}}, $src1}"),
3411 [(set _.RC:$dst, (_.VT
3412 (vselect _.KRCWM:$mask,
3413 (_.VT (ld_frag addr:$src1)),
3414 (_.VT _.RC:$src0))))], _.ExeDomain>,
3415 EVEX, EVEX_K, Sched<[Sched.RM]>;
3417 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3418 (ins _.KRCWM:$mask, _.MemOp:$src),
3419 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3420 "${dst} {${mask}} {z}, $src}",
3421 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3422 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3423 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3425 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3426 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3428 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3429 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3431 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3432 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3433 _.KRCWM:$mask, addr:$ptr)>;
3436 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3437 AVX512VLVectorVTInfo _, Predicate prd,
3438 X86SchedWriteMoveLSWidths Sched,
3439 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3440 let Predicates = [prd] in
3441 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3442 _.info512.AlignedLdFrag, masked_load_aligned,
3443 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3445 let Predicates = [prd, HasVLX] in {
3446 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3447 _.info256.AlignedLdFrag, masked_load_aligned,
3448 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3449 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3450 _.info128.AlignedLdFrag, masked_load_aligned,
3451 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3455 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3456 AVX512VLVectorVTInfo _, Predicate prd,
3457 X86SchedWriteMoveLSWidths Sched,
3458 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3459 SDPatternOperator SelectOprr = vselect> {
3460 let Predicates = [prd] in
3461 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3462 masked_load, Sched.ZMM, "",
3463 NoRMPattern, SelectOprr>, EVEX_V512;
3465 let Predicates = [prd, HasVLX] in {
3466 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3467 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3468 NoRMPattern, SelectOprr>, EVEX_V256;
3469 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3470 masked_load, Sched.XMM, EVEX2VEXOvrd,
3471 NoRMPattern, SelectOprr>, EVEX_V128;
3475 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3476 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3477 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3478 bit NoMRPattern = 0> {
3479 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3480 let isMoveReg = 1 in
3481 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3482 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3483 [], _.ExeDomain>, EVEX,
3484 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3485 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3486 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3487 (ins _.KRCWM:$mask, _.RC:$src),
3488 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3489 "${dst} {${mask}}, $src}",
3490 [], _.ExeDomain>, EVEX, EVEX_K,
3491 FoldGenData<BaseName#_.ZSuffix#rrk>,
3493 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3494 (ins _.KRCWM:$mask, _.RC:$src),
3495 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3496 "${dst} {${mask}} {z}, $src}",
3497 [], _.ExeDomain>, EVEX, EVEX_KZ,
3498 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3502 let hasSideEffects = 0, mayStore = 1 in
3503 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3504 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3505 !if(NoMRPattern, [],
3506 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3507 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3508 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3509 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3510 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3511 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3512 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3515 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3516 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3517 _.KRCWM:$mask, _.RC:$src)>;
3519 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3520 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3521 _.RC:$dst, _.RC:$src), 0>;
3522 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3523 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3524 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3525 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3526 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3527 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3530 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3531 AVX512VLVectorVTInfo _, Predicate prd,
3532 X86SchedWriteMoveLSWidths Sched,
3533 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3534 let Predicates = [prd] in
3535 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3536 masked_store, Sched.ZMM, "",
3537 NoMRPattern>, EVEX_V512;
3538 let Predicates = [prd, HasVLX] in {
3539 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3540 masked_store, Sched.YMM,
3541 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3542 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3543 masked_store, Sched.XMM, EVEX2VEXOvrd,
3544 NoMRPattern>, EVEX_V128;
3548 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3549 AVX512VLVectorVTInfo _, Predicate prd,
3550 X86SchedWriteMoveLSWidths Sched,
3551 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3552 let Predicates = [prd] in
3553 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3554 masked_store_aligned, Sched.ZMM, "",
3555 NoMRPattern>, EVEX_V512;
3557 let Predicates = [prd, HasVLX] in {
3558 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3559 masked_store_aligned, Sched.YMM,
3560 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3561 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3562 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3563 NoMRPattern>, EVEX_V128;
3567 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3568 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3569 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3570 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3571 PS, EVEX_CD8<32, CD8VF>;
3573 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3574 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3575 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3576 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3577 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3579 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3580 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3581 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3582 SchedWriteFMoveLS, "VMOVUPS">,
3583 PS, EVEX_CD8<32, CD8VF>;
3585 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3586 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3587 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3588 SchedWriteFMoveLS, "VMOVUPD">,
3589 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3591 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3592 HasAVX512, SchedWriteVecMoveLS,
3594 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3595 HasAVX512, SchedWriteVecMoveLS,
3597 PD, EVEX_CD8<32, CD8VF>;
3599 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3600 HasAVX512, SchedWriteVecMoveLS,
3602 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3603 HasAVX512, SchedWriteVecMoveLS,
3605 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3607 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3608 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3609 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3610 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3611 XD, EVEX_CD8<8, CD8VF>;
3613 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3614 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3615 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3616 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3617 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3619 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3620 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3621 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3622 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3623 XS, EVEX_CD8<32, CD8VF>;
3625 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3626 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3627 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3628 SchedWriteVecMoveLS, "VMOVDQU">,
3629 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3631 // Special instructions to help with spilling when we don't have VLX. We need
3632 // to load or store from a ZMM register instead. These are converted in
3633 // expandPostRAPseudos.
3634 let isReMaterializable = 1, canFoldAsLoad = 1,
3635 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3636 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3637 "", []>, Sched<[WriteFLoadX]>;
3638 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3639 "", []>, Sched<[WriteFLoadY]>;
3640 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3641 "", []>, Sched<[WriteFLoadX]>;
3642 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3643 "", []>, Sched<[WriteFLoadY]>;
3646 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3647 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3648 "", []>, Sched<[WriteFStoreX]>;
3649 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3650 "", []>, Sched<[WriteFStoreY]>;
3651 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3652 "", []>, Sched<[WriteFStoreX]>;
3653 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3654 "", []>, Sched<[WriteFStoreY]>;
3657 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3658 (v8i64 VR512:$src))),
3659 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3662 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3663 (v16i32 VR512:$src))),
3664 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3666 // These patterns exist to prevent the above patterns from introducing a second
3667 // mask inversion when one already exists.
3668 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3669 (v8i64 immAllZerosV),
3670 (v8i64 VR512:$src))),
3671 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3672 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3673 (v16i32 immAllZerosV),
3674 (v16i32 VR512:$src))),
3675 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3677 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3678 X86VectorVTInfo Wide> {
3679 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3680 Narrow.RC:$src1, Narrow.RC:$src0)),
3683 (!cast<Instruction>(InstrStr#"rrk")
3684 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3685 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3686 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3689 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3690 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3693 (!cast<Instruction>(InstrStr#"rrkz")
3694 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3695 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3699 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3700 // available. Use a 512-bit operation and extract.
3701 let Predicates = [HasAVX512, NoVLX] in {
3702 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3703 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3704 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3705 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3707 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3708 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3709 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3710 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3713 let Predicates = [HasBWI, NoVLX] in {
3714 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3715 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3717 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3718 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3721 let Predicates = [HasAVX512] in {
3723 def : Pat<(alignedloadv16i32 addr:$src),
3724 (VMOVDQA64Zrm addr:$src)>;
3725 def : Pat<(alignedloadv32i16 addr:$src),
3726 (VMOVDQA64Zrm addr:$src)>;
3727 def : Pat<(alignedloadv64i8 addr:$src),
3728 (VMOVDQA64Zrm addr:$src)>;
3729 def : Pat<(loadv16i32 addr:$src),
3730 (VMOVDQU64Zrm addr:$src)>;
3731 def : Pat<(loadv32i16 addr:$src),
3732 (VMOVDQU64Zrm addr:$src)>;
3733 def : Pat<(loadv64i8 addr:$src),
3734 (VMOVDQU64Zrm addr:$src)>;
3737 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3738 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3739 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3740 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3741 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3742 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3743 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3744 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3745 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3746 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3747 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3748 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3751 let Predicates = [HasVLX] in {
3753 def : Pat<(alignedloadv4i32 addr:$src),
3754 (VMOVDQA64Z128rm addr:$src)>;
3755 def : Pat<(alignedloadv8i16 addr:$src),
3756 (VMOVDQA64Z128rm addr:$src)>;
3757 def : Pat<(alignedloadv16i8 addr:$src),
3758 (VMOVDQA64Z128rm addr:$src)>;
3759 def : Pat<(loadv4i32 addr:$src),
3760 (VMOVDQU64Z128rm addr:$src)>;
3761 def : Pat<(loadv8i16 addr:$src),
3762 (VMOVDQU64Z128rm addr:$src)>;
3763 def : Pat<(loadv16i8 addr:$src),
3764 (VMOVDQU64Z128rm addr:$src)>;
3767 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3768 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3769 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3770 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3771 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3772 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3773 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3774 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3775 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3776 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3777 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3778 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3781 def : Pat<(alignedloadv8i32 addr:$src),
3782 (VMOVDQA64Z256rm addr:$src)>;
3783 def : Pat<(alignedloadv16i16 addr:$src),
3784 (VMOVDQA64Z256rm addr:$src)>;
3785 def : Pat<(alignedloadv32i8 addr:$src),
3786 (VMOVDQA64Z256rm addr:$src)>;
3787 def : Pat<(loadv8i32 addr:$src),
3788 (VMOVDQU64Z256rm addr:$src)>;
3789 def : Pat<(loadv16i16 addr:$src),
3790 (VMOVDQU64Z256rm addr:$src)>;
3791 def : Pat<(loadv32i8 addr:$src),
3792 (VMOVDQU64Z256rm addr:$src)>;
3795 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3796 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3797 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3798 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3799 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3800 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3801 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3802 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3803 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3804 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3805 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3806 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3809 // Move Int Doubleword to Packed Double Int
3811 let ExeDomain = SSEPackedInt in {
3812 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3813 "vmovd\t{$src, $dst|$dst, $src}",
3815 (v4i32 (scalar_to_vector GR32:$src)))]>,
3816 EVEX, Sched<[WriteVecMoveFromGpr]>;
3817 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3818 "vmovd\t{$src, $dst|$dst, $src}",
3820 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3821 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3822 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3823 "vmovq\t{$src, $dst|$dst, $src}",
3825 (v2i64 (scalar_to_vector GR64:$src)))]>,
3826 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3827 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3828 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3830 "vmovq\t{$src, $dst|$dst, $src}", []>,
3831 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3832 let isCodeGenOnly = 1 in {
3833 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3834 "vmovq\t{$src, $dst|$dst, $src}",
3835 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3836 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3837 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3838 "vmovq\t{$src, $dst|$dst, $src}",
3839 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3840 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3842 } // ExeDomain = SSEPackedInt
3844 // Move Int Doubleword to Single Scalar
3846 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3847 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3848 "vmovd\t{$src, $dst|$dst, $src}",
3849 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3850 EVEX, Sched<[WriteVecMoveFromGpr]>;
3851 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3853 // Move doubleword from xmm register to r/m32
3855 let ExeDomain = SSEPackedInt in {
3856 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3857 "vmovd\t{$src, $dst|$dst, $src}",
3858 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3860 EVEX, Sched<[WriteVecMoveToGpr]>;
3861 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3862 (ins i32mem:$dst, VR128X:$src),
3863 "vmovd\t{$src, $dst|$dst, $src}",
3864 [(store (i32 (extractelt (v4i32 VR128X:$src),
3865 (iPTR 0))), addr:$dst)]>,
3866 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3867 } // ExeDomain = SSEPackedInt
3869 // Move quadword from xmm1 register to r/m64
3871 let ExeDomain = SSEPackedInt in {
3872 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3873 "vmovq\t{$src, $dst|$dst, $src}",
3874 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3876 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3877 Requires<[HasAVX512]>;
3879 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3880 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3881 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3882 EVEX, VEX_W, Sched<[WriteVecStore]>,
3883 Requires<[HasAVX512, In64BitMode]>;
3885 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3886 (ins i64mem:$dst, VR128X:$src),
3887 "vmovq\t{$src, $dst|$dst, $src}",
3888 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3890 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3891 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3893 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3894 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3896 "vmovq\t{$src, $dst|$dst, $src}", []>,
3897 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3898 } // ExeDomain = SSEPackedInt
3900 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3901 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3903 let Predicates = [HasAVX512] in {
3904 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3905 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3908 // Move Scalar Single to Double Int
3910 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3911 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3913 "vmovd\t{$src, $dst|$dst, $src}",
3914 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3915 EVEX, Sched<[WriteVecMoveToGpr]>;
3916 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3918 // Move Quadword Int to Packed Quadword Int
3920 let ExeDomain = SSEPackedInt in {
3921 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3923 "vmovq\t{$src, $dst|$dst, $src}",
3925 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3926 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3927 } // ExeDomain = SSEPackedInt
3929 // Allow "vmovd" but print "vmovq".
3930 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3931 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3932 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3933 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3935 // Conversions between masks and scalar fp.
3936 def : Pat<(v32i1 (bitconvert FR32X:$src)),
3937 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3938 def : Pat<(f32 (bitconvert VK32:$src)),
3939 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3941 def : Pat<(v64i1 (bitconvert FR64X:$src)),
3942 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3943 def : Pat<(f64 (bitconvert VK64:$src)),
3944 (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3946 //===----------------------------------------------------------------------===//
3947 // AVX-512 MOVSS, MOVSD
3948 //===----------------------------------------------------------------------===//
3950 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3951 X86VectorVTInfo _> {
3952 let Predicates = [HasAVX512, OptForSize] in
3953 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3954 (ins _.RC:$src1, _.RC:$src2),
3955 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3956 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3957 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3958 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3959 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3960 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3961 "$dst {${mask}} {z}, $src1, $src2}"),
3962 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3963 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3965 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3966 let Constraints = "$src0 = $dst" in
3967 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3968 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3969 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3970 "$dst {${mask}}, $src1, $src2}"),
3971 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3972 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3973 (_.VT _.RC:$src0))))],
3974 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3975 let canFoldAsLoad = 1, isReMaterializable = 1 in {
3976 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3977 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3978 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3979 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3980 // _alt version uses FR32/FR64 register class.
3981 let isCodeGenOnly = 1 in
3982 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3983 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3984 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3985 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3987 let mayLoad = 1, hasSideEffects = 0 in {
3988 let Constraints = "$src0 = $dst" in
3989 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3990 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3991 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3992 "$dst {${mask}}, $src}"),
3993 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3994 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3995 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3996 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3997 "$dst {${mask}} {z}, $src}"),
3998 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4000 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4001 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4002 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
4003 EVEX, Sched<[WriteFStore]>;
4004 let mayStore = 1, hasSideEffects = 0 in
4005 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4006 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
4007 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4008 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4012 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
4013 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4015 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
4016 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4019 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4020 PatLeaf ZeroFP, X86VectorVTInfo _> {
4022 def : Pat<(_.VT (OpNode _.RC:$src0,
4023 (_.VT (scalar_to_vector
4024 (_.EltVT (X86selects VK1WM:$mask,
4025 (_.EltVT _.FRC:$src1),
4026 (_.EltVT _.FRC:$src2))))))),
4027 (!cast<Instruction>(InstrStr#rrk)
4028 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4031 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4033 def : Pat<(_.VT (OpNode _.RC:$src0,
4034 (_.VT (scalar_to_vector
4035 (_.EltVT (X86selects VK1WM:$mask,
4036 (_.EltVT _.FRC:$src1),
4037 (_.EltVT ZeroFP))))))),
4038 (!cast<Instruction>(InstrStr#rrkz)
4041 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4044 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4045 dag Mask, RegisterClass MaskRC> {
4047 def : Pat<(masked_store
4048 (_.info512.VT (insert_subvector undef,
4049 (_.info128.VT _.info128.RC:$src),
4050 (iPTR 0))), addr:$dst, Mask),
4051 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4052 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4053 _.info128.RC:$src)>;
4057 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4058 AVX512VLVectorVTInfo _,
4059 dag Mask, RegisterClass MaskRC,
4060 SubRegIndex subreg> {
4062 def : Pat<(masked_store
4063 (_.info512.VT (insert_subvector undef,
4064 (_.info128.VT _.info128.RC:$src),
4065 (iPTR 0))), addr:$dst, Mask),
4066 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4067 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4068 _.info128.RC:$src)>;
4072 // This matches the more recent codegen from clang that avoids emitting a 512
4073 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4074 // bits on AVX512F only targets.
4075 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4076 AVX512VLVectorVTInfo _,
4077 dag Mask512, dag Mask128,
4078 RegisterClass MaskRC,
4079 SubRegIndex subreg> {
4082 def : Pat<(masked_store
4083 (_.info512.VT (insert_subvector undef,
4084 (_.info128.VT _.info128.RC:$src),
4085 (iPTR 0))), addr:$dst, Mask512),
4086 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4087 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4088 _.info128.RC:$src)>;
4090 // AVX512VL pattern.
4091 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4092 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4093 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4094 _.info128.RC:$src)>;
4097 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4098 dag Mask, RegisterClass MaskRC> {
4100 def : Pat<(_.info128.VT (extract_subvector
4101 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4102 _.info512.ImmAllZerosV)),
4104 (!cast<Instruction>(InstrStr#rmkz)
4105 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4108 def : Pat<(_.info128.VT (extract_subvector
4109 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4110 (_.info512.VT (insert_subvector undef,
4111 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4114 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4115 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4120 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4121 AVX512VLVectorVTInfo _,
4122 dag Mask, RegisterClass MaskRC,
4123 SubRegIndex subreg> {
4125 def : Pat<(_.info128.VT (extract_subvector
4126 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4127 _.info512.ImmAllZerosV)),
4129 (!cast<Instruction>(InstrStr#rmkz)
4130 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4133 def : Pat<(_.info128.VT (extract_subvector
4134 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4135 (_.info512.VT (insert_subvector undef,
4136 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4139 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4140 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4145 // This matches the more recent codegen from clang that avoids emitting a 512
4146 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4147 // bits on AVX512F only targets.
4148 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4149 AVX512VLVectorVTInfo _,
4150 dag Mask512, dag Mask128,
4151 RegisterClass MaskRC,
4152 SubRegIndex subreg> {
4153 // AVX512F patterns.
4154 def : Pat<(_.info128.VT (extract_subvector
4155 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4156 _.info512.ImmAllZerosV)),
4158 (!cast<Instruction>(InstrStr#rmkz)
4159 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4162 def : Pat<(_.info128.VT (extract_subvector
4163 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4164 (_.info512.VT (insert_subvector undef,
4165 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4168 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4169 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4172 // AVX512Vl patterns.
4173 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4174 _.info128.ImmAllZerosV)),
4175 (!cast<Instruction>(InstrStr#rmkz)
4176 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4179 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4180 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4181 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4182 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4186 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4187 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4189 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4190 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4191 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4192 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4193 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4194 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4196 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4197 (v16i1 (insert_subvector
4198 (v16i1 immAllZerosV),
4199 (v4i1 (extract_subvector
4200 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4203 (v4i1 (extract_subvector
4204 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4205 (iPTR 0))), GR8, sub_8bit>;
4206 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4211 (v16i1 immAllZerosV),
4212 (v2i1 (extract_subvector
4213 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4217 (v2i1 (extract_subvector
4218 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4219 (iPTR 0))), GR8, sub_8bit>;
4221 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4222 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4223 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4224 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4225 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4226 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4228 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4229 (v16i1 (insert_subvector
4230 (v16i1 immAllZerosV),
4231 (v4i1 (extract_subvector
4232 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4235 (v4i1 (extract_subvector
4236 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4237 (iPTR 0))), GR8, sub_8bit>;
4238 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4243 (v16i1 immAllZerosV),
4244 (v2i1 (extract_subvector
4245 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4249 (v2i1 (extract_subvector
4250 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4251 (iPTR 0))), GR8, sub_8bit>;
4253 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4254 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4255 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4256 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4257 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4259 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4260 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4261 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4263 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4265 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4266 VK1WM:$mask, addr:$src)),
4268 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4269 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4271 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4272 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4273 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4274 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4275 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4277 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4278 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4279 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4281 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4283 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4284 VK1WM:$mask, addr:$src)),
4286 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4287 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4289 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4290 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4291 (ins VR128X:$src1, VR128X:$src2),
4292 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4293 []>, XS, EVEX_4V, VEX_LIG,
4294 FoldGenData<"VMOVSSZrr">,
4295 Sched<[SchedWriteFShuffle.XMM]>;
4297 let Constraints = "$src0 = $dst" in
4298 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4299 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4300 VR128X:$src1, VR128X:$src2),
4301 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4302 "$dst {${mask}}, $src1, $src2}",
4303 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4304 FoldGenData<"VMOVSSZrrk">,
4305 Sched<[SchedWriteFShuffle.XMM]>;
4307 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4308 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4309 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4310 "$dst {${mask}} {z}, $src1, $src2}",
4311 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4312 FoldGenData<"VMOVSSZrrkz">,
4313 Sched<[SchedWriteFShuffle.XMM]>;
4315 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4316 (ins VR128X:$src1, VR128X:$src2),
4317 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4318 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4319 FoldGenData<"VMOVSDZrr">,
4320 Sched<[SchedWriteFShuffle.XMM]>;
4322 let Constraints = "$src0 = $dst" in
4323 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4324 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4325 VR128X:$src1, VR128X:$src2),
4326 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4327 "$dst {${mask}}, $src1, $src2}",
4328 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4329 VEX_W, FoldGenData<"VMOVSDZrrk">,
4330 Sched<[SchedWriteFShuffle.XMM]>;
4332 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4333 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4335 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4336 "$dst {${mask}} {z}, $src1, $src2}",
4337 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4338 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4339 Sched<[SchedWriteFShuffle.XMM]>;
4342 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4343 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4344 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4345 "$dst {${mask}}, $src1, $src2}",
4346 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4347 VR128X:$src1, VR128X:$src2), 0>;
4348 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4349 "$dst {${mask}} {z}, $src1, $src2}",
4350 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4351 VR128X:$src1, VR128X:$src2), 0>;
4352 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4353 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4354 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4355 "$dst {${mask}}, $src1, $src2}",
4356 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4357 VR128X:$src1, VR128X:$src2), 0>;
4358 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4359 "$dst {${mask}} {z}, $src1, $src2}",
4360 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4361 VR128X:$src1, VR128X:$src2), 0>;
4363 let Predicates = [HasAVX512, OptForSize] in {
4364 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4365 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4366 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4367 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4369 // Move low f32 and clear high bits.
4370 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4371 (SUBREG_TO_REG (i32 0),
4372 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4373 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4374 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4375 (SUBREG_TO_REG (i32 0),
4376 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4377 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4379 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4380 (SUBREG_TO_REG (i32 0),
4381 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4382 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4383 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4384 (SUBREG_TO_REG (i32 0),
4385 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4386 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4389 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4390 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4391 let Predicates = [HasAVX512, OptForSpeed] in {
4392 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4393 (SUBREG_TO_REG (i32 0),
4394 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4395 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4396 (i8 1))), sub_xmm)>;
4397 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4398 (SUBREG_TO_REG (i32 0),
4399 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4400 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4401 (i8 3))), sub_xmm)>;
4404 let Predicates = [HasAVX512] in {
4405 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4406 (VMOVSSZrm addr:$src)>;
4407 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4408 (VMOVSDZrm addr:$src)>;
4410 // Represent the same patterns above but in the form they appear for
4412 def : Pat<(v8f32 (X86vzload32 addr:$src)),
4413 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4414 def : Pat<(v4f64 (X86vzload64 addr:$src)),
4415 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4417 // Represent the same patterns above but in the form they appear for
4419 def : Pat<(v16f32 (X86vzload32 addr:$src)),
4420 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4421 def : Pat<(v8f64 (X86vzload64 addr:$src)),
4422 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4425 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4426 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4428 "vmovq\t{$src, $dst|$dst, $src}",
4429 [(set VR128X:$dst, (v2i64 (X86vzmovl
4430 (v2i64 VR128X:$src))))]>,
4434 let Predicates = [HasAVX512] in {
4435 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4436 (VMOVDI2PDIZrr GR32:$src)>;
4438 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4439 (VMOV64toPQIZrr GR64:$src)>;
4441 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4442 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4443 (VMOVDI2PDIZrm addr:$src)>;
4444 def : Pat<(v4i32 (X86vzload32 addr:$src)),
4445 (VMOVDI2PDIZrm addr:$src)>;
4446 def : Pat<(v8i32 (X86vzload32 addr:$src)),
4447 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4448 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4449 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4450 def : Pat<(v2i64 (X86vzload64 addr:$src)),
4451 (VMOVQI2PQIZrm addr:$src)>;
4452 def : Pat<(v4i64 (X86vzload64 addr:$src)),
4453 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4455 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4456 def : Pat<(v16i32 (X86vzload32 addr:$src)),
4457 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4458 def : Pat<(v8i64 (X86vzload64 addr:$src)),
4459 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4461 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4462 (SUBREG_TO_REG (i32 0),
4463 (v2f64 (VMOVZPQILo2PQIZrr
4464 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4466 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4467 (SUBREG_TO_REG (i32 0),
4468 (v2i64 (VMOVZPQILo2PQIZrr
4469 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4472 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4473 (SUBREG_TO_REG (i32 0),
4474 (v2f64 (VMOVZPQILo2PQIZrr
4475 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4477 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4478 (SUBREG_TO_REG (i32 0),
4479 (v2i64 (VMOVZPQILo2PQIZrr
4480 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4484 //===----------------------------------------------------------------------===//
4485 // AVX-512 - Non-temporals
4486 //===----------------------------------------------------------------------===//
4488 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4489 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4490 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4491 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4493 let Predicates = [HasVLX] in {
4494 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4496 "vmovntdqa\t{$src, $dst|$dst, $src}",
4497 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4498 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4500 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4502 "vmovntdqa\t{$src, $dst|$dst, $src}",
4503 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4504 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4507 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4508 X86SchedWriteMoveLS Sched,
4509 PatFrag st_frag = alignednontemporalstore> {
4510 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4511 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4512 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4513 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4514 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4517 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4518 AVX512VLVectorVTInfo VTInfo,
4519 X86SchedWriteMoveLSWidths Sched> {
4520 let Predicates = [HasAVX512] in
4521 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4523 let Predicates = [HasAVX512, HasVLX] in {
4524 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4525 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4529 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4530 SchedWriteVecMoveLSNT>, PD;
4531 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4532 SchedWriteFMoveLSNT>, PD, VEX_W;
4533 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4534 SchedWriteFMoveLSNT>, PS;
4536 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4537 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4538 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4539 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4540 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4541 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4542 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4544 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4545 (VMOVNTDQAZrm addr:$src)>;
4546 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4547 (VMOVNTDQAZrm addr:$src)>;
4548 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4549 (VMOVNTDQAZrm addr:$src)>;
4550 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4551 (VMOVNTDQAZrm addr:$src)>;
4552 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4553 (VMOVNTDQAZrm addr:$src)>;
4554 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4555 (VMOVNTDQAZrm addr:$src)>;
4558 let Predicates = [HasVLX], AddedComplexity = 400 in {
4559 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4560 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4561 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4562 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4563 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4564 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4566 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4567 (VMOVNTDQAZ256rm addr:$src)>;
4568 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4569 (VMOVNTDQAZ256rm addr:$src)>;
4570 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4571 (VMOVNTDQAZ256rm addr:$src)>;
4572 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4573 (VMOVNTDQAZ256rm addr:$src)>;
4574 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4575 (VMOVNTDQAZ256rm addr:$src)>;
4576 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4577 (VMOVNTDQAZ256rm addr:$src)>;
4579 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4580 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4581 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4582 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4583 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4584 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4586 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4587 (VMOVNTDQAZ128rm addr:$src)>;
4588 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4589 (VMOVNTDQAZ128rm addr:$src)>;
4590 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4591 (VMOVNTDQAZ128rm addr:$src)>;
4592 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4593 (VMOVNTDQAZ128rm addr:$src)>;
4594 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4595 (VMOVNTDQAZ128rm addr:$src)>;
4596 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4597 (VMOVNTDQAZ128rm addr:$src)>;
4600 //===----------------------------------------------------------------------===//
4601 // AVX-512 - Integer arithmetic
4603 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4604 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4605 bit IsCommutable = 0> {
4606 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4607 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4608 "$src2, $src1", "$src1, $src2",
4609 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4610 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4613 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4614 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4615 "$src2, $src1", "$src1, $src2",
4616 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4617 AVX512BIBase, EVEX_4V,
4618 Sched<[sched.Folded, sched.ReadAfterFold]>;
4621 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4622 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4623 bit IsCommutable = 0> :
4624 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4625 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4626 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4627 "${src2}"##_.BroadcastStr##", $src1",
4628 "$src1, ${src2}"##_.BroadcastStr,
4629 (_.VT (OpNode _.RC:$src1,
4630 (_.BroadcastLdFrag addr:$src2)))>,
4631 AVX512BIBase, EVEX_4V, EVEX_B,
4632 Sched<[sched.Folded, sched.ReadAfterFold]>;
4635 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4636 AVX512VLVectorVTInfo VTInfo,
4637 X86SchedWriteWidths sched, Predicate prd,
4638 bit IsCommutable = 0> {
4639 let Predicates = [prd] in
4640 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4641 IsCommutable>, EVEX_V512;
4643 let Predicates = [prd, HasVLX] in {
4644 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4645 sched.YMM, IsCommutable>, EVEX_V256;
4646 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4647 sched.XMM, IsCommutable>, EVEX_V128;
4651 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4652 AVX512VLVectorVTInfo VTInfo,
4653 X86SchedWriteWidths sched, Predicate prd,
4654 bit IsCommutable = 0> {
4655 let Predicates = [prd] in
4656 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4657 IsCommutable>, EVEX_V512;
4659 let Predicates = [prd, HasVLX] in {
4660 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4661 sched.YMM, IsCommutable>, EVEX_V256;
4662 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4663 sched.XMM, IsCommutable>, EVEX_V128;
4667 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668 X86SchedWriteWidths sched, Predicate prd,
4669 bit IsCommutable = 0> {
4670 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4671 sched, prd, IsCommutable>,
4672 VEX_W, EVEX_CD8<64, CD8VF>;
4675 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4676 X86SchedWriteWidths sched, Predicate prd,
4677 bit IsCommutable = 0> {
4678 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4679 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4682 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683 X86SchedWriteWidths sched, Predicate prd,
4684 bit IsCommutable = 0> {
4685 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4686 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4690 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4691 X86SchedWriteWidths sched, Predicate prd,
4692 bit IsCommutable = 0> {
4693 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4694 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4698 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4699 SDNode OpNode, X86SchedWriteWidths sched,
4700 Predicate prd, bit IsCommutable = 0> {
4701 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4704 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4708 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4709 SDNode OpNode, X86SchedWriteWidths sched,
4710 Predicate prd, bit IsCommutable = 0> {
4711 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4714 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4718 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4719 bits<8> opc_d, bits<8> opc_q,
4720 string OpcodeStr, SDNode OpNode,
4721 X86SchedWriteWidths sched,
4722 bit IsCommutable = 0> {
4723 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4724 sched, HasAVX512, IsCommutable>,
4725 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4726 sched, HasBWI, IsCommutable>;
4729 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4730 X86FoldableSchedWrite sched,
4731 SDNode OpNode,X86VectorVTInfo _Src,
4732 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4733 bit IsCommutable = 0> {
4734 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4735 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4736 "$src2, $src1","$src1, $src2",
4738 (_Src.VT _Src.RC:$src1),
4739 (_Src.VT _Src.RC:$src2))),
4741 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4742 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4743 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4744 "$src2, $src1", "$src1, $src2",
4745 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4746 (_Src.LdFrag addr:$src2)))>,
4747 AVX512BIBase, EVEX_4V,
4748 Sched<[sched.Folded, sched.ReadAfterFold]>;
4750 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4751 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4753 "${src2}"##_Brdct.BroadcastStr##", $src1",
4754 "$src1, ${src2}"##_Brdct.BroadcastStr,
4755 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4756 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4757 AVX512BIBase, EVEX_4V, EVEX_B,
4758 Sched<[sched.Folded, sched.ReadAfterFold]>;
4761 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4762 SchedWriteVecALU, 1>;
4763 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4764 SchedWriteVecALU, 0>;
4765 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4766 SchedWriteVecALU, HasBWI, 1>;
4767 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4768 SchedWriteVecALU, HasBWI, 0>;
4769 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4770 SchedWriteVecALU, HasBWI, 1>;
4771 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4772 SchedWriteVecALU, HasBWI, 0>;
4773 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4774 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4775 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4776 SchedWriteVecIMul, HasBWI, 1>;
4777 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4778 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4779 NotEVEX2VEXConvertible;
4780 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4782 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4784 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4785 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4786 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4787 SchedWriteVecALU, HasBWI, 1>;
4788 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4789 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4790 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4791 SchedWriteVecIMul, HasAVX512, 1>;
4793 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4794 X86SchedWriteWidths sched,
4795 AVX512VLVectorVTInfo _SrcVTInfo,
4796 AVX512VLVectorVTInfo _DstVTInfo,
4797 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4798 let Predicates = [prd] in
4799 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4800 _SrcVTInfo.info512, _DstVTInfo.info512,
4801 v8i64_info, IsCommutable>,
4802 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4803 let Predicates = [HasVLX, prd] in {
4804 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4805 _SrcVTInfo.info256, _DstVTInfo.info256,
4806 v4i64x_info, IsCommutable>,
4807 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4808 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4809 _SrcVTInfo.info128, _DstVTInfo.info128,
4810 v2i64x_info, IsCommutable>,
4811 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4815 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4816 avx512vl_i8_info, avx512vl_i8_info,
4817 X86multishift, HasVBMI, 0>, T8PD;
4819 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4820 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4821 X86FoldableSchedWrite sched> {
4822 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4823 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4825 "${src2}"##_Src.BroadcastStr##", $src1",
4826 "$src1, ${src2}"##_Src.BroadcastStr,
4827 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4828 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4829 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4830 Sched<[sched.Folded, sched.ReadAfterFold]>;
4833 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4834 SDNode OpNode,X86VectorVTInfo _Src,
4835 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4836 bit IsCommutable = 0> {
4837 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4838 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4839 "$src2, $src1","$src1, $src2",
4841 (_Src.VT _Src.RC:$src1),
4842 (_Src.VT _Src.RC:$src2))),
4843 IsCommutable, IsCommutable>,
4844 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4845 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4846 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4847 "$src2, $src1", "$src1, $src2",
4848 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4849 (_Src.LdFrag addr:$src2)))>,
4850 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4851 Sched<[sched.Folded, sched.ReadAfterFold]>;
4854 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4856 let Predicates = [HasBWI] in
4857 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4858 v32i16_info, SchedWriteShuffle.ZMM>,
4859 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4860 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4861 let Predicates = [HasBWI, HasVLX] in {
4862 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4863 v16i16x_info, SchedWriteShuffle.YMM>,
4864 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4865 v16i16x_info, SchedWriteShuffle.YMM>,
4867 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4868 v8i16x_info, SchedWriteShuffle.XMM>,
4869 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4870 v8i16x_info, SchedWriteShuffle.XMM>,
4874 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4876 let Predicates = [HasBWI] in
4877 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4878 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4879 let Predicates = [HasBWI, HasVLX] in {
4880 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4881 v32i8x_info, SchedWriteShuffle.YMM>,
4883 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4884 v16i8x_info, SchedWriteShuffle.XMM>,
4889 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4890 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4891 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4892 let Predicates = [HasBWI] in
4893 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4894 _Dst.info512, SchedWriteVecIMul.ZMM,
4895 IsCommutable>, EVEX_V512;
4896 let Predicates = [HasBWI, HasVLX] in {
4897 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4898 _Dst.info256, SchedWriteVecIMul.YMM,
4899 IsCommutable>, EVEX_V256;
4900 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4901 _Dst.info128, SchedWriteVecIMul.XMM,
4902 IsCommutable>, EVEX_V128;
4906 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4907 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4908 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4909 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4911 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4912 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4913 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4914 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4916 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4917 SchedWriteVecALU, HasBWI, 1>, T8PD;
4918 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4919 SchedWriteVecALU, HasBWI, 1>;
4920 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4921 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4922 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4923 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4924 NotEVEX2VEXConvertible;
4926 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4927 SchedWriteVecALU, HasBWI, 1>;
4928 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4929 SchedWriteVecALU, HasBWI, 1>, T8PD;
4930 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4931 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4932 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4933 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4934 NotEVEX2VEXConvertible;
4936 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4937 SchedWriteVecALU, HasBWI, 1>, T8PD;
4938 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4939 SchedWriteVecALU, HasBWI, 1>;
4940 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4941 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4942 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4943 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4944 NotEVEX2VEXConvertible;
4946 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4947 SchedWriteVecALU, HasBWI, 1>;
4948 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4949 SchedWriteVecALU, HasBWI, 1>, T8PD;
4950 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4951 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4952 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4953 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4954 NotEVEX2VEXConvertible;
4956 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4957 let Predicates = [HasDQI, NoVLX] in {
4958 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4961 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4962 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4964 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4967 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4971 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4974 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4975 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4977 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4980 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4985 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4986 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4988 (!cast<Instruction>(Instr#"rr")
4989 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4990 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4992 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4994 (!cast<Instruction>(Instr#"rmb")
4995 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4999 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5001 (!cast<Instruction>(Instr#"rr")
5002 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5003 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5005 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
5007 (!cast<Instruction>(Instr#"rmb")
5008 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5013 let Predicates = [HasAVX512, NoVLX] in {
5014 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5015 defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5016 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5017 defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5020 //===----------------------------------------------------------------------===//
5021 // AVX-512 Logical Instructions
5022 //===----------------------------------------------------------------------===//
5024 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5025 SchedWriteVecLogic, HasAVX512, 1>;
5026 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5027 SchedWriteVecLogic, HasAVX512, 1>;
5028 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5029 SchedWriteVecLogic, HasAVX512, 1>;
5030 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5031 SchedWriteVecLogic, HasAVX512>;
5033 let Predicates = [HasVLX] in {
5034 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5035 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5036 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5037 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5039 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5040 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5041 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5042 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5044 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5045 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5046 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5047 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5049 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5050 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5051 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5052 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5054 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5055 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5056 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5057 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5059 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5060 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5061 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5062 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5064 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5065 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5066 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5067 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5069 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5070 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5071 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5072 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5074 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5075 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5076 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5077 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5079 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5080 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5081 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5082 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5084 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5085 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5086 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5087 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5089 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5090 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5091 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5092 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5094 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5095 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5096 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5097 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5099 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5100 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5101 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5102 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5104 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5105 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5106 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5107 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5109 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5110 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5111 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5112 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5115 let Predicates = [HasAVX512] in {
5116 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5117 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5118 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5119 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5121 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5122 (VPORQZrr VR512:$src1, VR512:$src2)>;
5123 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5124 (VPORQZrr VR512:$src1, VR512:$src2)>;
5126 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5127 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5128 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5129 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5131 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5132 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5133 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5134 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5136 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5137 (VPANDQZrm VR512:$src1, addr:$src2)>;
5138 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5139 (VPANDQZrm VR512:$src1, addr:$src2)>;
5141 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5142 (VPORQZrm VR512:$src1, addr:$src2)>;
5143 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5144 (VPORQZrm VR512:$src1, addr:$src2)>;
5146 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5147 (VPXORQZrm VR512:$src1, addr:$src2)>;
5148 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5149 (VPXORQZrm VR512:$src1, addr:$src2)>;
5151 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5152 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5153 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5154 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5157 // Patterns to catch vselect with different type than logic op.
5158 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5160 X86VectorVTInfo IntInfo> {
5161 // Masked register-register logical operations.
5162 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5163 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5165 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5166 _.RC:$src1, _.RC:$src2)>;
5168 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5169 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5171 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5174 // Masked register-memory logical operations.
5175 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5176 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5177 (load addr:$src2)))),
5179 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5180 _.RC:$src1, addr:$src2)>;
5181 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5182 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5183 (load addr:$src2)))),
5185 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5189 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5191 X86VectorVTInfo IntInfo> {
5192 // Register-broadcast logical operations.
5193 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5195 (IntInfo.VT (OpNode _.RC:$src1,
5196 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5198 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5199 _.RC:$src1, addr:$src2)>;
5200 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5202 (IntInfo.VT (OpNode _.RC:$src1,
5203 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5205 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5206 _.RC:$src1, addr:$src2)>;
5209 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5210 AVX512VLVectorVTInfo SelectInfo,
5211 AVX512VLVectorVTInfo IntInfo> {
5212 let Predicates = [HasVLX] in {
5213 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5215 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5218 let Predicates = [HasAVX512] in {
5219 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5224 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5225 AVX512VLVectorVTInfo SelectInfo,
5226 AVX512VLVectorVTInfo IntInfo> {
5227 let Predicates = [HasVLX] in {
5228 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5229 SelectInfo.info128, IntInfo.info128>;
5230 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5231 SelectInfo.info256, IntInfo.info256>;
5233 let Predicates = [HasAVX512] in {
5234 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5235 SelectInfo.info512, IntInfo.info512>;
5239 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5240 // i64 vselect with i32/i16/i8 logic op
5241 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5243 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5245 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5248 // i32 vselect with i64/i16/i8 logic op
5249 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5251 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5253 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5256 // f32 vselect with i64/i32/i16/i8 logic op
5257 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5259 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5261 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5263 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5266 // f64 vselect with i64/i32/i16/i8 logic op
5267 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5269 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5271 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5273 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5276 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5279 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5284 defm : avx512_logical_lowering_types<"VPAND", and>;
5285 defm : avx512_logical_lowering_types<"VPOR", or>;
5286 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5287 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5289 //===----------------------------------------------------------------------===//
5290 // AVX-512 FP arithmetic
5291 //===----------------------------------------------------------------------===//
5293 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5294 SDNode OpNode, SDNode VecNode,
5295 X86FoldableSchedWrite sched, bit IsCommutable> {
5296 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5297 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5298 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5299 "$src2, $src1", "$src1, $src2",
5300 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5303 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5304 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5305 "$src2, $src1", "$src1, $src2",
5306 (_.VT (VecNode _.RC:$src1,
5307 _.ScalarIntMemCPat:$src2))>,
5308 Sched<[sched.Folded, sched.ReadAfterFold]>;
5309 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5310 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5311 (ins _.FRC:$src1, _.FRC:$src2),
5312 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5313 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5315 let isCommutable = IsCommutable;
5317 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5318 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5319 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5320 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5321 (_.ScalarLdFrag addr:$src2)))]>,
5322 Sched<[sched.Folded, sched.ReadAfterFold]>;
5327 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5328 SDNode VecNode, X86FoldableSchedWrite sched,
5329 bit IsCommutable = 0> {
5330 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5331 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5332 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5333 "$rc, $src2, $src1", "$src1, $src2, $rc",
5334 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5336 EVEX_B, EVEX_RC, Sched<[sched]>;
5338 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5339 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5340 X86FoldableSchedWrite sched, bit IsCommutable,
5341 string EVEX2VexOvrd> {
5342 let ExeDomain = _.ExeDomain in {
5343 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5344 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5345 "$src2, $src1", "$src1, $src2",
5346 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5347 Sched<[sched]>, SIMD_EXC;
5349 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5350 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5351 "$src2, $src1", "$src1, $src2",
5352 (_.VT (VecNode _.RC:$src1,
5353 _.ScalarIntMemCPat:$src2))>,
5354 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5356 let isCodeGenOnly = 1, Predicates = [HasAVX512],
5357 Uses = [MXCSR], mayRaiseFPException = 1 in {
5358 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5359 (ins _.FRC:$src1, _.FRC:$src2),
5360 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5361 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5363 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5364 let isCommutable = IsCommutable;
5366 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5367 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5368 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5369 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5370 (_.ScalarLdFrag addr:$src2)))]>,
5371 Sched<[sched.Folded, sched.ReadAfterFold]>,
5372 EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5375 let Uses = [MXCSR] in
5376 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5377 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5378 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5379 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5380 EVEX_B, Sched<[sched]>;
5384 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5385 SDNode VecNode, SDNode RndNode,
5386 X86SchedWriteSizes sched, bit IsCommutable> {
5387 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5388 sched.PS.Scl, IsCommutable>,
5389 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5390 sched.PS.Scl, IsCommutable>,
5391 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5392 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5393 sched.PD.Scl, IsCommutable>,
5394 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5395 sched.PD.Scl, IsCommutable>,
5396 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5399 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5400 SDNode VecNode, SDNode SaeNode,
5401 X86SchedWriteSizes sched, bit IsCommutable> {
5402 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5403 VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5405 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5406 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5407 VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5409 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5411 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5412 SchedWriteFAddSizes, 1>;
5413 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5414 SchedWriteFMulSizes, 1>;
5415 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5416 SchedWriteFAddSizes, 0>;
5417 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5418 SchedWriteFDivSizes, 0>;
5419 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5420 SchedWriteFCmpSizes, 0>;
5421 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5422 SchedWriteFCmpSizes, 0>;
5424 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5425 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5426 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5427 X86VectorVTInfo _, SDNode OpNode,
5428 X86FoldableSchedWrite sched,
5429 string EVEX2VEXOvrd> {
5430 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5431 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5432 (ins _.FRC:$src1, _.FRC:$src2),
5433 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5434 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5435 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5436 let isCommutable = 1;
5438 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5439 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5440 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5441 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5442 (_.ScalarLdFrag addr:$src2)))]>,
5443 Sched<[sched.Folded, sched.ReadAfterFold]>,
5444 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5447 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5448 SchedWriteFCmp.Scl, "VMINCSS">, XS,
5449 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5451 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5452 SchedWriteFCmp.Scl, "VMINCSD">, XD,
5453 VEX_W, EVEX_4V, VEX_LIG,
5454 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5456 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5457 SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5458 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5460 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5461 SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5462 VEX_W, EVEX_4V, VEX_LIG,
5463 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5465 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5466 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5468 bit IsKCommutable = IsCommutable> {
5469 let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5470 Uses = [MXCSR], mayRaiseFPException = 1 in {
5471 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5472 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5473 "$src2, $src1", "$src1, $src2",
5474 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5475 IsKCommutable, IsKCommutable>,
5476 EVEX_4V, Sched<[sched]>;
5477 let mayLoad = 1 in {
5478 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5479 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5480 "$src2, $src1", "$src1, $src2",
5481 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5482 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5483 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5484 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5485 "${src2}"##_.BroadcastStr##", $src1",
5486 "$src1, ${src2}"##_.BroadcastStr,
5487 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5489 Sched<[sched.Folded, sched.ReadAfterFold]>;
5494 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5495 SDPatternOperator OpNodeRnd,
5496 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5497 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5498 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5499 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5500 "$rc, $src2, $src1", "$src1, $src2, $rc",
5501 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5502 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5505 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5506 SDPatternOperator OpNodeSAE,
5507 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5508 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5509 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5510 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5511 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5512 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5513 EVEX_4V, EVEX_B, Sched<[sched]>;
5516 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5517 Predicate prd, X86SchedWriteSizes sched,
5518 bit IsCommutable = 0,
5519 bit IsPD128Commutable = IsCommutable> {
5520 let Predicates = [prd] in {
5521 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5522 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5523 EVEX_CD8<32, CD8VF>;
5524 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5525 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5526 EVEX_CD8<64, CD8VF>;
5529 // Define only if AVX512VL feature is present.
5530 let Predicates = [prd, HasVLX] in {
5531 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5532 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5533 EVEX_CD8<32, CD8VF>;
5534 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5535 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5536 EVEX_CD8<32, CD8VF>;
5537 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5538 sched.PD.XMM, IsPD128Commutable,
5539 IsCommutable>, EVEX_V128, PD, VEX_W,
5540 EVEX_CD8<64, CD8VF>;
5541 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5542 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5543 EVEX_CD8<64, CD8VF>;
5547 let Uses = [MXCSR] in
5548 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5549 X86SchedWriteSizes sched> {
5550 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5552 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5553 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5555 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5558 let Uses = [MXCSR] in
5559 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5560 X86SchedWriteSizes sched> {
5561 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5563 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5564 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5566 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5569 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, HasAVX512,
5570 SchedWriteFAddSizes, 1>,
5571 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5572 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, HasAVX512,
5573 SchedWriteFMulSizes, 1>,
5574 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5575 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, HasAVX512,
5576 SchedWriteFAddSizes>,
5577 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5578 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, HasAVX512,
5579 SchedWriteFDivSizes>,
5580 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5581 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5582 SchedWriteFCmpSizes, 0>,
5583 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5584 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5585 SchedWriteFCmpSizes, 0>,
5586 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5587 let isCodeGenOnly = 1 in {
5588 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5589 SchedWriteFCmpSizes, 1>;
5590 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5591 SchedWriteFCmpSizes, 1>;
5593 let Uses = []<Register>, mayRaiseFPException = 0 in {
5594 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5595 SchedWriteFLogicSizes, 1>;
5596 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5597 SchedWriteFLogicSizes, 0>;
5598 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5599 SchedWriteFLogicSizes, 1>;
5600 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5601 SchedWriteFLogicSizes, 1>;
5604 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5605 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5606 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5607 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5608 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5609 "$src2, $src1", "$src1, $src2",
5610 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5611 EVEX_4V, Sched<[sched]>;
5612 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5613 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5614 "$src2, $src1", "$src1, $src2",
5615 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5616 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5617 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5618 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5619 "${src2}"##_.BroadcastStr##", $src1",
5620 "$src1, ${src2}"##_.BroadcastStr,
5621 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5622 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5626 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5627 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5628 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5629 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5630 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5631 "$src2, $src1", "$src1, $src2",
5632 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5634 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5635 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5636 "$src2, $src1", "$src1, $src2",
5637 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2)>,
5638 Sched<[sched.Folded, sched.ReadAfterFold]>;
5642 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5643 X86SchedWriteWidths sched> {
5644 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5645 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5646 EVEX_V512, EVEX_CD8<32, CD8VF>;
5647 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5648 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5649 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5650 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5651 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info,
5652 X86scalefsRnd, sched.Scl>,
5653 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5654 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5655 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info,
5656 X86scalefsRnd, sched.Scl>,
5657 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5659 // Define only if AVX512VL feature is present.
5660 let Predicates = [HasVLX] in {
5661 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5662 EVEX_V128, EVEX_CD8<32, CD8VF>;
5663 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5664 EVEX_V256, EVEX_CD8<32, CD8VF>;
5665 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5666 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5667 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5668 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5671 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5672 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5674 //===----------------------------------------------------------------------===//
5675 // AVX-512 VPTESTM instructions
5676 //===----------------------------------------------------------------------===//
5678 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5679 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5681 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5682 // There are just too many permuations due to commutability and bitcasts.
5683 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5684 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5685 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5686 "$src2, $src1", "$src1, $src2",
5687 (null_frag), (null_frag), 1>,
5688 EVEX_4V, Sched<[sched]>;
5690 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5691 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5692 "$src2, $src1", "$src1, $src2",
5693 (null_frag), (null_frag)>,
5694 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5695 Sched<[sched.Folded, sched.ReadAfterFold]>;
5699 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5700 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5701 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5702 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5703 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5704 "${src2}"##_.BroadcastStr##", $src1",
5705 "$src1, ${src2}"##_.BroadcastStr,
5706 (null_frag), (null_frag)>,
5707 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5708 Sched<[sched.Folded, sched.ReadAfterFold]>;
5711 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5712 X86SchedWriteWidths sched,
5713 AVX512VLVectorVTInfo _> {
5714 let Predicates = [HasAVX512] in
5715 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5716 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5718 let Predicates = [HasAVX512, HasVLX] in {
5719 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5720 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5721 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5722 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5726 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5727 X86SchedWriteWidths sched> {
5728 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5730 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5731 avx512vl_i64_info>, VEX_W;
5734 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5735 X86SchedWriteWidths sched> {
5736 let Predicates = [HasBWI] in {
5737 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5738 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5739 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5740 v64i8_info, NAME#"B">, EVEX_V512;
5742 let Predicates = [HasVLX, HasBWI] in {
5744 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5745 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5746 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5747 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5748 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5749 v32i8x_info, NAME#"B">, EVEX_V256;
5750 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5751 v16i8x_info, NAME#"B">, EVEX_V128;
5755 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5756 X86SchedWriteWidths sched> :
5757 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5758 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5760 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5761 SchedWriteVecLogic>, T8PD;
5762 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5763 SchedWriteVecLogic>, T8XS;
5765 //===----------------------------------------------------------------------===//
5766 // AVX-512 Shift instructions
5767 //===----------------------------------------------------------------------===//
5769 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5770 string OpcodeStr, SDNode OpNode,
5771 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5772 let ExeDomain = _.ExeDomain in {
5773 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5774 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5775 "$src2, $src1", "$src1, $src2",
5776 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5778 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5779 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5780 "$src2, $src1", "$src1, $src2",
5781 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5783 Sched<[sched.Folded]>;
5787 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5788 string OpcodeStr, SDNode OpNode,
5789 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5790 let ExeDomain = _.ExeDomain in
5791 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5792 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5793 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5794 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5795 EVEX_B, Sched<[sched.Folded]>;
5798 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5799 X86FoldableSchedWrite sched, ValueType SrcVT,
5800 X86VectorVTInfo _> {
5801 // src2 is always 128-bit
5802 let ExeDomain = _.ExeDomain in {
5803 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5804 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5805 "$src2, $src1", "$src1, $src2",
5806 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5807 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5808 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5809 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5810 "$src2, $src1", "$src1, $src2",
5811 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5813 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5817 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5818 X86SchedWriteWidths sched, ValueType SrcVT,
5819 AVX512VLVectorVTInfo VTInfo,
5821 let Predicates = [prd] in
5822 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5823 VTInfo.info512>, EVEX_V512,
5824 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5825 let Predicates = [prd, HasVLX] in {
5826 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5827 VTInfo.info256>, EVEX_V256,
5828 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5829 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5830 VTInfo.info128>, EVEX_V128,
5831 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5835 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5836 string OpcodeStr, SDNode OpNode,
5837 X86SchedWriteWidths sched,
5838 bit NotEVEX2VEXConvertibleQ = 0> {
5839 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5840 avx512vl_i32_info, HasAVX512>;
5841 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5842 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5843 avx512vl_i64_info, HasAVX512>, VEX_W;
5844 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5845 avx512vl_i16_info, HasBWI>;
5848 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5849 string OpcodeStr, SDNode OpNode,
5850 X86SchedWriteWidths sched,
5851 AVX512VLVectorVTInfo VTInfo> {
5852 let Predicates = [HasAVX512] in
5853 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5854 sched.ZMM, VTInfo.info512>,
5855 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5856 VTInfo.info512>, EVEX_V512;
5857 let Predicates = [HasAVX512, HasVLX] in {
5858 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5859 sched.YMM, VTInfo.info256>,
5860 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5861 VTInfo.info256>, EVEX_V256;
5862 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5863 sched.XMM, VTInfo.info128>,
5864 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5865 VTInfo.info128>, EVEX_V128;
5869 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5870 string OpcodeStr, SDNode OpNode,
5871 X86SchedWriteWidths sched> {
5872 let Predicates = [HasBWI] in
5873 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5874 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5875 let Predicates = [HasVLX, HasBWI] in {
5876 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5877 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5878 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5879 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5883 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5884 Format ImmFormR, Format ImmFormM,
5885 string OpcodeStr, SDNode OpNode,
5886 X86SchedWriteWidths sched,
5887 bit NotEVEX2VEXConvertibleQ = 0> {
5888 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5889 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5890 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5891 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5892 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5895 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5896 SchedWriteVecShiftImm>,
5897 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5898 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5900 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5901 SchedWriteVecShiftImm>,
5902 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5903 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5905 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5906 SchedWriteVecShiftImm, 1>,
5907 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5908 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5910 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5911 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5912 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5913 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5915 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5916 SchedWriteVecShift>;
5917 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5918 SchedWriteVecShift, 1>;
5919 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5920 SchedWriteVecShift>;
5922 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5923 let Predicates = [HasAVX512, NoVLX] in {
5924 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5925 (EXTRACT_SUBREG (v8i64
5927 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5928 VR128X:$src2)), sub_ymm)>;
5930 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5931 (EXTRACT_SUBREG (v8i64
5933 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5934 VR128X:$src2)), sub_xmm)>;
5936 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5937 (EXTRACT_SUBREG (v8i64
5939 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5940 timm:$src2)), sub_ymm)>;
5942 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5943 (EXTRACT_SUBREG (v8i64
5945 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5946 timm:$src2)), sub_xmm)>;
5949 //===-------------------------------------------------------------------===//
5950 // Variable Bit Shifts
5951 //===-------------------------------------------------------------------===//
5953 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5954 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5955 let ExeDomain = _.ExeDomain in {
5956 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5957 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5958 "$src2, $src1", "$src1, $src2",
5959 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5960 AVX5128IBase, EVEX_4V, Sched<[sched]>;
5961 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5962 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5963 "$src2, $src1", "$src1, $src2",
5964 (_.VT (OpNode _.RC:$src1,
5965 (_.VT (_.LdFrag addr:$src2))))>,
5966 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5967 Sched<[sched.Folded, sched.ReadAfterFold]>;
5971 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5972 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5973 let ExeDomain = _.ExeDomain in
5974 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5975 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5976 "${src2}"##_.BroadcastStr##", $src1",
5977 "$src1, ${src2}"##_.BroadcastStr,
5978 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5979 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5980 Sched<[sched.Folded, sched.ReadAfterFold]>;
5983 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5984 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5985 let Predicates = [HasAVX512] in
5986 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5987 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5989 let Predicates = [HasAVX512, HasVLX] in {
5990 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5991 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5992 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5993 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5997 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5998 SDNode OpNode, X86SchedWriteWidths sched> {
5999 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6001 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6002 avx512vl_i64_info>, VEX_W;
6005 // Use 512bit version to implement 128/256 bit in case NoVLX.
6006 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6007 SDNode OpNode, list<Predicate> p> {
6008 let Predicates = p in {
6009 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6010 (_.info256.VT _.info256.RC:$src2))),
6012 (!cast<Instruction>(OpcodeStr#"Zrr")
6013 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6014 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6017 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6018 (_.info128.VT _.info128.RC:$src2))),
6020 (!cast<Instruction>(OpcodeStr#"Zrr")
6021 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6022 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6026 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6027 SDNode OpNode, X86SchedWriteWidths sched> {
6028 let Predicates = [HasBWI] in
6029 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6031 let Predicates = [HasVLX, HasBWI] in {
6033 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6035 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6040 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6041 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6043 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6044 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6046 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6047 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6049 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6050 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6052 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6053 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6054 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6055 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6058 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6059 let Predicates = [HasAVX512, NoVLX] in {
6060 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6061 (EXTRACT_SUBREG (v8i64
6063 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6064 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6066 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6067 (EXTRACT_SUBREG (v8i64
6069 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6070 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6073 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6074 (EXTRACT_SUBREG (v16i32
6076 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6077 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6079 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6080 (EXTRACT_SUBREG (v16i32
6082 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6083 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6086 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6087 (EXTRACT_SUBREG (v8i64
6089 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6090 timm:$src2)), sub_xmm)>;
6091 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6092 (EXTRACT_SUBREG (v8i64
6094 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6095 timm:$src2)), sub_ymm)>;
6097 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6098 (EXTRACT_SUBREG (v16i32
6100 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6101 timm:$src2)), sub_xmm)>;
6102 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6103 (EXTRACT_SUBREG (v16i32
6105 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6106 timm:$src2)), sub_ymm)>;
6109 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6110 let Predicates = [HasAVX512, NoVLX] in {
6111 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6112 (EXTRACT_SUBREG (v8i64
6114 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6115 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6117 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6118 (EXTRACT_SUBREG (v8i64
6120 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6121 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6124 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6125 (EXTRACT_SUBREG (v16i32
6127 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6128 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6130 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6131 (EXTRACT_SUBREG (v16i32
6133 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6134 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6137 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6138 (EXTRACT_SUBREG (v8i64
6140 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6141 timm:$src2)), sub_xmm)>;
6142 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6143 (EXTRACT_SUBREG (v8i64
6145 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6146 timm:$src2)), sub_ymm)>;
6148 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6149 (EXTRACT_SUBREG (v16i32
6151 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6152 timm:$src2)), sub_xmm)>;
6153 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6154 (EXTRACT_SUBREG (v16i32
6156 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6157 timm:$src2)), sub_ymm)>;
6160 //===-------------------------------------------------------------------===//
6161 // 1-src variable permutation VPERMW/D/Q
6162 //===-------------------------------------------------------------------===//
6164 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6165 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6166 let Predicates = [HasAVX512] in
6167 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6168 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6170 let Predicates = [HasAVX512, HasVLX] in
6171 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6172 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6175 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6176 string OpcodeStr, SDNode OpNode,
6177 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6178 let Predicates = [HasAVX512] in
6179 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6180 sched, VTInfo.info512>,
6181 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6182 sched, VTInfo.info512>, EVEX_V512;
6183 let Predicates = [HasAVX512, HasVLX] in
6184 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6185 sched, VTInfo.info256>,
6186 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6187 sched, VTInfo.info256>, EVEX_V256;
6190 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6191 Predicate prd, SDNode OpNode,
6192 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6193 let Predicates = [prd] in
6194 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6196 let Predicates = [HasVLX, prd] in {
6197 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6199 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6204 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6205 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6206 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6207 WriteVarShuffle256, avx512vl_i8_info>;
6209 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6210 WriteVarShuffle256, avx512vl_i32_info>;
6211 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6212 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6213 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6214 WriteFVarShuffle256, avx512vl_f32_info>;
6215 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6216 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6218 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6219 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6220 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6221 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6222 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6223 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6225 //===----------------------------------------------------------------------===//
6226 // AVX-512 - VPERMIL
6227 //===----------------------------------------------------------------------===//
6229 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6230 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6231 X86VectorVTInfo Ctrl> {
6232 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6233 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6234 "$src2, $src1", "$src1, $src2",
6235 (_.VT (OpNode _.RC:$src1,
6236 (Ctrl.VT Ctrl.RC:$src2)))>,
6237 T8PD, EVEX_4V, Sched<[sched]>;
6238 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6239 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6240 "$src2, $src1", "$src1, $src2",
6243 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6244 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6245 Sched<[sched.Folded, sched.ReadAfterFold]>;
6246 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6247 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6248 "${src2}"##_.BroadcastStr##", $src1",
6249 "$src1, ${src2}"##_.BroadcastStr,
6252 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6253 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6254 Sched<[sched.Folded, sched.ReadAfterFold]>;
6257 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6258 X86SchedWriteWidths sched,
6259 AVX512VLVectorVTInfo _,
6260 AVX512VLVectorVTInfo Ctrl> {
6261 let Predicates = [HasAVX512] in {
6262 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6263 _.info512, Ctrl.info512>, EVEX_V512;
6265 let Predicates = [HasAVX512, HasVLX] in {
6266 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6267 _.info128, Ctrl.info128>, EVEX_V128;
6268 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6269 _.info256, Ctrl.info256>, EVEX_V256;
6273 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6274 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6275 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6277 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6278 X86VPermilpi, SchedWriteFShuffle, _>,
6279 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6282 let ExeDomain = SSEPackedSingle in
6283 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6285 let ExeDomain = SSEPackedDouble in
6286 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6287 avx512vl_i64_info>, VEX_W1X;
6289 //===----------------------------------------------------------------------===//
6290 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6291 //===----------------------------------------------------------------------===//
6293 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6294 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6295 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6296 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6297 X86PShufhw, SchedWriteShuffle>,
6298 EVEX, AVX512XSIi8Base;
6299 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6300 X86PShuflw, SchedWriteShuffle>,
6301 EVEX, AVX512XDIi8Base;
6303 //===----------------------------------------------------------------------===//
6304 // AVX-512 - VPSHUFB
6305 //===----------------------------------------------------------------------===//
6307 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6308 X86SchedWriteWidths sched> {
6309 let Predicates = [HasBWI] in
6310 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6313 let Predicates = [HasVLX, HasBWI] in {
6314 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6316 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6321 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6322 SchedWriteVarShuffle>, VEX_WIG;
6324 //===----------------------------------------------------------------------===//
6325 // Move Low to High and High to Low packed FP Instructions
6326 //===----------------------------------------------------------------------===//
6328 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6329 (ins VR128X:$src1, VR128X:$src2),
6330 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6331 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6332 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6333 let isCommutable = 1 in
6334 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6335 (ins VR128X:$src1, VR128X:$src2),
6336 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6337 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6338 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6340 //===----------------------------------------------------------------------===//
6341 // VMOVHPS/PD VMOVLPS Instructions
6342 // All patterns was taken from SSS implementation.
6343 //===----------------------------------------------------------------------===//
6345 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6346 SDPatternOperator OpNode,
6347 X86VectorVTInfo _> {
6348 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6349 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6350 (ins _.RC:$src1, f64mem:$src2),
6351 !strconcat(OpcodeStr,
6352 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6356 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6357 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6360 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6361 // SSE1. And MOVLPS pattern is even more complex.
6362 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6363 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6364 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6365 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6366 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6367 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6368 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6369 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6371 let Predicates = [HasAVX512] in {
6373 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6374 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6375 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6376 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6377 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6380 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6381 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6384 let SchedRW = [WriteFStore] in {
6385 let mayStore = 1, hasSideEffects = 0 in
6386 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6387 (ins f64mem:$dst, VR128X:$src),
6388 "vmovhps\t{$src, $dst|$dst, $src}",
6389 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6390 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6391 (ins f64mem:$dst, VR128X:$src),
6392 "vmovhpd\t{$src, $dst|$dst, $src}",
6393 [(store (f64 (extractelt
6394 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6395 (iPTR 0))), addr:$dst)]>,
6396 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6397 let mayStore = 1, hasSideEffects = 0 in
6398 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6399 (ins f64mem:$dst, VR128X:$src),
6400 "vmovlps\t{$src, $dst|$dst, $src}",
6401 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6402 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6403 (ins f64mem:$dst, VR128X:$src),
6404 "vmovlpd\t{$src, $dst|$dst, $src}",
6405 [(store (f64 (extractelt (v2f64 VR128X:$src),
6406 (iPTR 0))), addr:$dst)]>,
6407 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6410 let Predicates = [HasAVX512] in {
6412 def : Pat<(store (f64 (extractelt
6413 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6414 (iPTR 0))), addr:$dst),
6415 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6417 //===----------------------------------------------------------------------===//
6418 // FMA - Fused Multiply Operations
6421 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6422 X86FoldableSchedWrite sched,
6423 X86VectorVTInfo _, string Suff> {
6424 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6425 Uses = [MXCSR], mayRaiseFPException = 1 in {
6426 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6427 (ins _.RC:$src2, _.RC:$src3),
6428 OpcodeStr, "$src3, $src2", "$src2, $src3",
6429 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6430 AVX512FMA3Base, Sched<[sched]>;
6432 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6433 (ins _.RC:$src2, _.MemOp:$src3),
6434 OpcodeStr, "$src3, $src2", "$src2, $src3",
6435 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6436 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6438 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6439 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6440 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6441 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6443 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6444 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6448 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6449 X86FoldableSchedWrite sched,
6450 X86VectorVTInfo _, string Suff> {
6451 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6453 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6454 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6455 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6456 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6457 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6460 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6461 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6462 AVX512VLVectorVTInfo _, string Suff> {
6463 let Predicates = [HasAVX512] in {
6464 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6466 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6468 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6470 let Predicates = [HasVLX, HasAVX512] in {
6471 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6473 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6474 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6476 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6480 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6482 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6483 SchedWriteFMA, avx512vl_f32_info, "PS">;
6484 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6485 SchedWriteFMA, avx512vl_f64_info, "PD">,
6489 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, X86FmaddRnd>;
6490 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6491 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6492 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6493 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6494 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6497 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6498 X86FoldableSchedWrite sched,
6499 X86VectorVTInfo _, string Suff> {
6500 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6501 Uses = [MXCSR], mayRaiseFPException = 1 in {
6502 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6503 (ins _.RC:$src2, _.RC:$src3),
6504 OpcodeStr, "$src3, $src2", "$src2, $src3",
6505 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6506 vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6508 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6509 (ins _.RC:$src2, _.MemOp:$src3),
6510 OpcodeStr, "$src3, $src2", "$src2, $src3",
6511 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6512 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6514 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6515 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6516 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6517 "$src2, ${src3}"##_.BroadcastStr,
6518 (_.VT (OpNode _.RC:$src2,
6519 (_.VT (_.BroadcastLdFrag addr:$src3)),
6520 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6521 Sched<[sched.Folded, sched.ReadAfterFold]>;
6525 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6526 X86FoldableSchedWrite sched,
6527 X86VectorVTInfo _, string Suff> {
6528 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6530 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6531 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6532 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6533 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6535 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6538 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6539 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6540 AVX512VLVectorVTInfo _, string Suff> {
6541 let Predicates = [HasAVX512] in {
6542 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6544 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6546 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6548 let Predicates = [HasVLX, HasAVX512] in {
6549 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
6551 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6552 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
6554 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6558 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6559 SDNode OpNodeRnd > {
6560 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6561 SchedWriteFMA, avx512vl_f32_info, "PS">;
6562 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6563 SchedWriteFMA, avx512vl_f64_info, "PD">,
6567 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, X86FmaddRnd>;
6568 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6569 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6570 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6571 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6572 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6574 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6575 X86FoldableSchedWrite sched,
6576 X86VectorVTInfo _, string Suff> {
6577 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6578 Uses = [MXCSR], mayRaiseFPException = 1 in {
6579 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6580 (ins _.RC:$src2, _.RC:$src3),
6581 OpcodeStr, "$src3, $src2", "$src2, $src3",
6582 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
6583 AVX512FMA3Base, Sched<[sched]>;
6585 // Pattern is 312 order so that the load is in a different place from the
6586 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6587 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6588 (ins _.RC:$src2, _.MemOp:$src3),
6589 OpcodeStr, "$src3, $src2", "$src2, $src3",
6590 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6591 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6593 // Pattern is 312 order so that the load is in a different place from the
6594 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6595 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6596 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6597 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6598 "$src2, ${src3}"##_.BroadcastStr,
6599 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6600 _.RC:$src1, _.RC:$src2)), 1, 0>,
6601 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6605 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6606 X86FoldableSchedWrite sched,
6607 X86VectorVTInfo _, string Suff> {
6608 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6610 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6611 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6612 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6613 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6615 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6618 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6619 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6620 AVX512VLVectorVTInfo _, string Suff> {
6621 let Predicates = [HasAVX512] in {
6622 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6624 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6626 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6628 let Predicates = [HasVLX, HasAVX512] in {
6629 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
6631 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6632 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
6634 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6638 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6639 SDNode OpNodeRnd > {
6640 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6641 SchedWriteFMA, avx512vl_f32_info, "PS">;
6642 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6643 SchedWriteFMA, avx512vl_f64_info, "PD">,
6647 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, X86FmaddRnd>;
6648 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6649 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6650 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6651 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6652 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6655 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6656 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6657 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6658 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6659 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6660 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6661 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6664 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6665 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6666 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6667 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6669 let Uses = [MXCSR] in
6670 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6671 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6672 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6673 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6675 let isCodeGenOnly = 1, isCommutable = 1 in {
6676 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6677 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6678 !strconcat(OpcodeStr,
6679 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6680 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6681 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6682 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6683 !strconcat(OpcodeStr,
6684 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6685 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6687 let Uses = [MXCSR] in
6688 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6689 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6690 !strconcat(OpcodeStr,
6691 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6692 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6693 Sched<[SchedWriteFMA.Scl]>;
6694 }// isCodeGenOnly = 1
6695 }// Constraints = "$src1 = $dst"
6698 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6699 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6700 X86VectorVTInfo _, string SUFF> {
6701 let ExeDomain = _.ExeDomain in {
6702 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6703 // Operands for intrinsic are in 123 order to preserve passthu
6705 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6707 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6708 (_.ScalarLdFrag addr:$src3)))),
6709 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6710 _.FRC:$src3, (i32 timm:$rc)))), 0>;
6712 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6713 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6715 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6716 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6717 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6718 _.FRC:$src1, (i32 timm:$rc)))), 1>;
6720 // One pattern is 312 order so that the load is in a different place from the
6721 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6722 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6723 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6725 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6726 _.FRC:$src1, _.FRC:$src2))),
6727 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6728 _.FRC:$src2, (i32 timm:$rc)))), 1>;
6732 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6733 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6734 let Predicates = [HasAVX512] in {
6735 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6736 OpNodeRnd, f32x_info, "SS">,
6737 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6738 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6739 OpNodeRnd, f64x_info, "SD">,
6740 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6744 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
6745 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
6746 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
6747 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
6749 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
6750 string Suffix, SDNode Move,
6751 X86VectorVTInfo _, PatLeaf ZeroFP> {
6752 let Predicates = [HasAVX512] in {
6753 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6755 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6757 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6758 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6759 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6761 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6762 (Op _.FRC:$src2, _.FRC:$src3,
6763 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6764 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6765 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6766 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6768 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6770 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6771 (_.ScalarLdFrag addr:$src3)))))),
6772 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6773 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6776 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6777 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6778 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6779 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6780 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6783 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6784 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6785 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6786 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6787 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6790 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6791 (X86selects VK1WM:$mask,
6793 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6795 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6796 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6797 VR128X:$src1, VK1WM:$mask,
6798 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6799 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6801 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6802 (X86selects VK1WM:$mask,
6804 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6805 (_.ScalarLdFrag addr:$src3)),
6806 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6807 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6808 VR128X:$src1, VK1WM:$mask,
6809 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6811 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6812 (X86selects VK1WM:$mask,
6813 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6814 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6815 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6816 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6817 VR128X:$src1, VK1WM:$mask,
6818 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6820 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6821 (X86selects VK1WM:$mask,
6822 (Op _.FRC:$src2, _.FRC:$src3,
6823 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6824 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6825 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6826 VR128X:$src1, VK1WM:$mask,
6827 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6828 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6830 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6831 (X86selects VK1WM:$mask,
6832 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6833 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6834 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6835 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6836 VR128X:$src1, VK1WM:$mask,
6837 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6839 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6840 (X86selects VK1WM:$mask,
6842 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6844 (_.EltVT ZeroFP)))))),
6845 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6846 VR128X:$src1, VK1WM:$mask,
6847 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6848 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6850 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6851 (X86selects VK1WM:$mask,
6852 (Op _.FRC:$src2, _.FRC:$src3,
6853 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6854 (_.EltVT ZeroFP)))))),
6855 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6856 VR128X:$src1, VK1WM:$mask,
6857 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6858 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6860 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6861 (X86selects VK1WM:$mask,
6863 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6864 (_.ScalarLdFrag addr:$src3)),
6865 (_.EltVT ZeroFP)))))),
6866 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6867 VR128X:$src1, VK1WM:$mask,
6868 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6870 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6871 (X86selects VK1WM:$mask,
6872 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6873 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6874 (_.EltVT ZeroFP)))))),
6875 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6876 VR128X:$src1, VK1WM:$mask,
6877 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6879 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6880 (X86selects VK1WM:$mask,
6881 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6882 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6883 (_.EltVT ZeroFP)))))),
6884 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6885 VR128X:$src1, VK1WM:$mask,
6886 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6888 // Patterns with rounding mode.
6889 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6891 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6892 _.FRC:$src3, (i32 timm:$rc)))))),
6893 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6894 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6895 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6897 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6898 (RndOp _.FRC:$src2, _.FRC:$src3,
6899 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6900 (i32 timm:$rc)))))),
6901 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6902 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6903 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6905 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6906 (X86selects VK1WM:$mask,
6908 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6909 _.FRC:$src3, (i32 timm:$rc)),
6910 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6911 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6912 VR128X:$src1, VK1WM:$mask,
6913 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6914 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6916 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6917 (X86selects VK1WM:$mask,
6918 (RndOp _.FRC:$src2, _.FRC:$src3,
6919 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6921 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6922 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6923 VR128X:$src1, VK1WM:$mask,
6924 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6925 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6927 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6928 (X86selects VK1WM:$mask,
6930 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6931 _.FRC:$src3, (i32 timm:$rc)),
6932 (_.EltVT ZeroFP)))))),
6933 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6934 VR128X:$src1, VK1WM:$mask,
6935 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6936 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6938 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6939 (X86selects VK1WM:$mask,
6940 (RndOp _.FRC:$src2, _.FRC:$src3,
6941 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6943 (_.EltVT ZeroFP)))))),
6944 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6945 VR128X:$src1, VK1WM:$mask,
6946 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6947 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6951 defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS",
6952 X86Movss, v4f32x_info, fp32imm0>;
6953 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
6954 X86Movss, v4f32x_info, fp32imm0>;
6955 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
6956 X86Movss, v4f32x_info, fp32imm0>;
6957 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
6958 X86Movss, v4f32x_info, fp32imm0>;
6960 defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD",
6961 X86Movsd, v2f64x_info, fp64imm0>;
6962 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
6963 X86Movsd, v2f64x_info, fp64imm0>;
6964 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
6965 X86Movsd, v2f64x_info, fp64imm0>;
6966 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
6967 X86Movsd, v2f64x_info, fp64imm0>;
6969 //===----------------------------------------------------------------------===//
6970 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6971 //===----------------------------------------------------------------------===//
6972 let Constraints = "$src1 = $dst" in {
6973 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6974 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6975 // NOTE: The SDNode have the multiply operands first with the add last.
6976 // This enables commuted load patterns to be autogenerated by tablegen.
6977 let ExeDomain = _.ExeDomain in {
6978 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6979 (ins _.RC:$src2, _.RC:$src3),
6980 OpcodeStr, "$src3, $src2", "$src2, $src3",
6981 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6982 AVX512FMA3Base, Sched<[sched]>;
6984 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6985 (ins _.RC:$src2, _.MemOp:$src3),
6986 OpcodeStr, "$src3, $src2", "$src2, $src3",
6987 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
6988 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6990 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6991 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6992 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6993 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6995 (_.VT (_.BroadcastLdFrag addr:$src3)),
6997 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7000 } // Constraints = "$src1 = $dst"
7002 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7003 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7004 let Predicates = [HasIFMA] in {
7005 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7006 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7008 let Predicates = [HasVLX, HasIFMA] in {
7009 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7010 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7011 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7012 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7016 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7017 SchedWriteVecIMul, avx512vl_i64_info>,
7019 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7020 SchedWriteVecIMul, avx512vl_i64_info>,
7023 //===----------------------------------------------------------------------===//
7024 // AVX-512 Scalar convert from sign integer to float/double
7025 //===----------------------------------------------------------------------===//
7027 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7028 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7029 X86MemOperand x86memop, PatFrag ld_frag, string asm,
7030 string mem, list<Register> _Uses = [MXCSR],
7031 bit _mayRaiseFPException = 1> {
7032 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7033 mayRaiseFPException = _mayRaiseFPException in {
7034 let hasSideEffects = 0, isCodeGenOnly = 1 in {
7035 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7036 (ins DstVT.FRC:$src1, SrcRC:$src),
7037 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7038 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7040 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7041 (ins DstVT.FRC:$src1, x86memop:$src),
7042 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7043 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7044 } // hasSideEffects = 0
7045 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7046 (ins DstVT.RC:$src1, SrcRC:$src2),
7047 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7048 [(set DstVT.RC:$dst,
7049 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7050 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7052 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7053 (ins DstVT.RC:$src1, x86memop:$src2),
7054 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7055 [(set DstVT.RC:$dst,
7056 (OpNode (DstVT.VT DstVT.RC:$src1),
7057 (ld_frag addr:$src2)))]>,
7058 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7060 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7061 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7062 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7065 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7066 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7067 X86VectorVTInfo DstVT, string asm,
7069 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7070 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7071 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7073 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7074 [(set DstVT.RC:$dst,
7075 (OpNode (DstVT.VT DstVT.RC:$src1),
7078 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7079 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7080 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7081 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7084 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7085 X86FoldableSchedWrite sched,
7086 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7087 X86MemOperand x86memop, PatFrag ld_frag,
7088 string asm, string mem> {
7089 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7090 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7091 ld_frag, asm, mem>, VEX_LIG;
7094 let Predicates = [HasAVX512] in {
7095 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7097 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7098 XS, EVEX_CD8<32, CD8VT1>;
7099 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7101 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7102 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7103 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7104 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7105 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7106 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7108 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7109 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7111 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7112 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7113 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7114 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7116 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7117 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7118 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7119 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7120 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7121 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7122 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7123 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7125 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7126 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7127 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7128 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7129 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7130 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7131 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7132 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7134 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7136 v4f32x_info, i32mem, loadi32,
7137 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7138 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7140 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7141 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7142 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7143 i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7144 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7145 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7147 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7148 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7150 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7151 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7152 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7153 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7155 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7156 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7157 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7158 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7159 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7160 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7161 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7162 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7164 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7165 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7166 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7167 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7168 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7169 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7170 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7171 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7174 //===----------------------------------------------------------------------===//
7175 // AVX-512 Scalar convert from float/double to integer
7176 //===----------------------------------------------------------------------===//
7178 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7179 X86VectorVTInfo DstVT, SDNode OpNode,
7181 X86FoldableSchedWrite sched, string asm,
7183 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7184 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7185 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7186 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7187 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7188 let Uses = [MXCSR] in
7189 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7190 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7191 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7192 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7194 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7195 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7196 [(set DstVT.RC:$dst, (OpNode
7197 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7198 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7199 } // Predicates = [HasAVX512]
7201 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7202 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7203 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7204 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7205 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7206 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7207 SrcVT.IntScalarMemOp:$src), 0, "att">;
7210 // Convert float/double to signed/unsigned int 32/64
7211 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7212 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7213 XS, EVEX_CD8<32, CD8VT1>;
7214 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7215 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7216 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7217 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7218 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7219 XS, EVEX_CD8<32, CD8VT1>;
7220 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7221 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7222 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7223 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7224 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7225 XD, EVEX_CD8<64, CD8VT1>;
7226 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7227 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7228 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7229 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7230 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7231 XD, EVEX_CD8<64, CD8VT1>;
7232 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7233 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7234 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7236 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7237 // which produce unnecessary vmovs{s,d} instructions
7238 let Predicates = [HasAVX512] in {
7239 def : Pat<(v4f32 (X86Movss
7240 (v4f32 VR128X:$dst),
7241 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7242 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7244 def : Pat<(v4f32 (X86Movss
7245 (v4f32 VR128X:$dst),
7246 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7247 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7249 def : Pat<(v4f32 (X86Movss
7250 (v4f32 VR128X:$dst),
7251 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7252 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7254 def : Pat<(v4f32 (X86Movss
7255 (v4f32 VR128X:$dst),
7256 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7257 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7259 def : Pat<(v2f64 (X86Movsd
7260 (v2f64 VR128X:$dst),
7261 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7262 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7264 def : Pat<(v2f64 (X86Movsd
7265 (v2f64 VR128X:$dst),
7266 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7267 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7269 def : Pat<(v2f64 (X86Movsd
7270 (v2f64 VR128X:$dst),
7271 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7272 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7274 def : Pat<(v2f64 (X86Movsd
7275 (v2f64 VR128X:$dst),
7276 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7277 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7279 def : Pat<(v4f32 (X86Movss
7280 (v4f32 VR128X:$dst),
7281 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7282 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7284 def : Pat<(v4f32 (X86Movss
7285 (v4f32 VR128X:$dst),
7286 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7287 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7289 def : Pat<(v4f32 (X86Movss
7290 (v4f32 VR128X:$dst),
7291 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7292 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7294 def : Pat<(v4f32 (X86Movss
7295 (v4f32 VR128X:$dst),
7296 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7297 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7299 def : Pat<(v2f64 (X86Movsd
7300 (v2f64 VR128X:$dst),
7301 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7302 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7304 def : Pat<(v2f64 (X86Movsd
7305 (v2f64 VR128X:$dst),
7306 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7307 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7309 def : Pat<(v2f64 (X86Movsd
7310 (v2f64 VR128X:$dst),
7311 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7312 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7314 def : Pat<(v2f64 (X86Movsd
7315 (v2f64 VR128X:$dst),
7316 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7317 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7318 } // Predicates = [HasAVX512]
7320 // Convert float/double to signed/unsigned int 32/64 with truncation
7321 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7322 X86VectorVTInfo _DstRC, SDNode OpNode,
7323 SDNode OpNodeInt, SDNode OpNodeSAE,
7324 X86FoldableSchedWrite sched, string aliasStr>{
7325 let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
7326 let isCodeGenOnly = 1 in {
7327 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7328 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7329 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7330 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7331 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7332 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7333 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7334 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7337 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7338 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7339 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7340 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7341 let Uses = [MXCSR] in
7342 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7343 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7344 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7345 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7346 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7347 (ins _SrcRC.IntScalarMemOp:$src),
7348 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7349 [(set _DstRC.RC:$dst,
7350 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7351 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7354 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7355 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7356 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7357 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7358 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7359 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7360 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7363 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7364 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7365 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7366 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7367 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7368 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7369 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7370 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7371 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7372 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7373 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7374 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7376 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7377 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7378 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7379 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7380 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7381 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7382 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7383 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7384 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7385 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7386 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7387 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7389 //===----------------------------------------------------------------------===//
7390 // AVX-512 Convert form float to double and back
7391 //===----------------------------------------------------------------------===//
7393 let Uses = [MXCSR], mayRaiseFPException = 1 in
7394 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7395 X86VectorVTInfo _Src, SDNode OpNode,
7396 X86FoldableSchedWrite sched> {
7397 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7398 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7399 "$src2, $src1", "$src1, $src2",
7400 (_.VT (OpNode (_.VT _.RC:$src1),
7401 (_Src.VT _Src.RC:$src2)))>,
7402 EVEX_4V, VEX_LIG, Sched<[sched]>;
7403 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7404 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7405 "$src2, $src1", "$src1, $src2",
7406 (_.VT (OpNode (_.VT _.RC:$src1),
7407 (_Src.VT _Src.ScalarIntMemCPat:$src2)))>,
7409 Sched<[sched.Folded, sched.ReadAfterFold]>;
7411 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7412 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7413 (ins _.FRC:$src1, _Src.FRC:$src2),
7414 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7415 EVEX_4V, VEX_LIG, Sched<[sched]>;
7417 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7418 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7419 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7420 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7424 // Scalar Coversion with SAE - suppress all exceptions
7425 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7426 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7427 X86FoldableSchedWrite sched> {
7428 let Uses = [MXCSR] in
7429 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7430 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7431 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7432 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7433 (_Src.VT _Src.RC:$src2)))>,
7434 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7437 // Scalar Conversion with rounding control (RC)
7438 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7439 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7440 X86FoldableSchedWrite sched> {
7441 let Uses = [MXCSR] in
7442 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7443 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7444 "$rc, $src2, $src1", "$src1, $src2, $rc",
7445 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7446 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7447 EVEX_4V, VEX_LIG, Sched<[sched]>,
7450 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7451 SDNode OpNode, SDNode OpNodeRnd,
7452 X86FoldableSchedWrite sched,
7453 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7454 let Predicates = [HasAVX512] in {
7455 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7456 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7457 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7461 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7462 SDNode OpNode, SDNode OpNodeSAE,
7463 X86FoldableSchedWrite sched,
7464 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7465 let Predicates = [HasAVX512] in {
7466 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7467 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7468 EVEX_CD8<32, CD8VT1>, XS;
7471 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7472 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7474 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7475 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7478 def : Pat<(f64 (any_fpextend FR32X:$src)),
7479 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7480 Requires<[HasAVX512]>;
7481 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7482 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7483 Requires<[HasAVX512, OptForSize]>;
7485 def : Pat<(f32 (any_fpround FR64X:$src)),
7486 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7487 Requires<[HasAVX512]>;
7489 def : Pat<(v4f32 (X86Movss
7490 (v4f32 VR128X:$dst),
7491 (v4f32 (scalar_to_vector
7492 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7493 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7494 Requires<[HasAVX512]>;
7496 def : Pat<(v2f64 (X86Movsd
7497 (v2f64 VR128X:$dst),
7498 (v2f64 (scalar_to_vector
7499 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7500 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7501 Requires<[HasAVX512]>;
7503 //===----------------------------------------------------------------------===//
7504 // AVX-512 Vector convert from signed/unsigned integer to float/double
7505 // and from float/double to signed/unsigned integer
7506 //===----------------------------------------------------------------------===//
7508 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7509 X86VectorVTInfo _Src, SDNode OpNode,
7510 X86FoldableSchedWrite sched,
7511 string Broadcast = _.BroadcastStr,
7512 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7513 RegisterClass MaskRC = _.KRCWM,
7514 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7515 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7516 defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
7518 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7519 (ins MaskRC:$mask, _Src.RC:$src),
7520 OpcodeStr, "$src", "$src",
7521 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7522 (vselect MaskRC:$mask,
7523 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7525 vselect, "$src0 = $dst">,
7526 EVEX, Sched<[sched]>;
7528 defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7530 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7531 (ins MaskRC:$mask, MemOp:$src),
7532 OpcodeStr#Alias, "$src", "$src",
7534 (vselect MaskRC:$mask, LdDAG, _.RC:$src0),
7535 vselect, "$src0 = $dst">,
7536 EVEX, Sched<[sched.Folded]>;
7538 defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
7539 (ins _Src.ScalarMemOp:$src),
7540 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7541 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7543 "${src}"##Broadcast, "${src}"##Broadcast,
7544 (_.VT (OpNode (_Src.VT
7545 (_Src.BroadcastLdFrag addr:$src))
7547 (vselect MaskRC:$mask,
7551 (_Src.BroadcastLdFrag addr:$src)))),
7553 vselect, "$src0 = $dst">,
7554 EVEX, EVEX_B, Sched<[sched.Folded]>;
7557 // Coversion with SAE - suppress all exceptions
7558 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7559 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7560 X86FoldableSchedWrite sched> {
7561 let Uses = [MXCSR] in
7562 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7563 (ins _Src.RC:$src), OpcodeStr,
7564 "{sae}, $src", "$src, {sae}",
7565 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7566 EVEX, EVEX_B, Sched<[sched]>;
7569 // Conversion with rounding control (RC)
7570 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7571 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7572 X86FoldableSchedWrite sched> {
7573 let Uses = [MXCSR] in
7574 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7575 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7576 "$rc, $src", "$src, $rc",
7577 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7578 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7581 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7582 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7583 X86VectorVTInfo _Src, SDNode OpNode,
7584 X86FoldableSchedWrite sched,
7585 string Broadcast = _.BroadcastStr,
7586 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7587 RegisterClass MaskRC = _.KRCWM>
7588 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
7590 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7592 // Extend Float to Double
7593 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7594 X86SchedWriteWidths sched> {
7595 let Predicates = [HasAVX512] in {
7596 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7597 any_fpextend, sched.ZMM>,
7598 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7599 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7601 let Predicates = [HasVLX] in {
7602 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7603 X86any_vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
7604 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend,
7605 sched.YMM>, EVEX_V256;
7609 // Truncate Double to Float
7610 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7611 let Predicates = [HasAVX512] in {
7612 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, X86any_vfpround, sched.ZMM>,
7613 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7614 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7616 let Predicates = [HasVLX] in {
7617 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7618 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
7620 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, X86any_vfpround,
7621 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7624 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7625 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7626 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7627 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7628 VK2WM:$mask, VR128X:$src), 0, "att">;
7629 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|"
7630 "$dst {${mask}} {z}, $src}",
7631 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7632 VK2WM:$mask, VR128X:$src), 0, "att">;
7633 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7634 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7635 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7636 "$dst {${mask}}, ${src}{1to2}}",
7637 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7638 VK2WM:$mask, f64mem:$src), 0, "att">;
7639 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7640 "$dst {${mask}} {z}, ${src}{1to2}}",
7641 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7642 VK2WM:$mask, f64mem:$src), 0, "att">;
7644 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7645 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7646 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7647 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7648 VK4WM:$mask, VR256X:$src), 0, "att">;
7649 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
7650 "$dst {${mask}} {z}, $src}",
7651 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7652 VK4WM:$mask, VR256X:$src), 0, "att">;
7653 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7654 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7655 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7656 "$dst {${mask}}, ${src}{1to4}}",
7657 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7658 VK4WM:$mask, f64mem:$src), 0, "att">;
7659 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7660 "$dst {${mask}} {z}, ${src}{1to4}}",
7661 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7662 VK4WM:$mask, f64mem:$src), 0, "att">;
7665 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7666 VEX_W, PD, EVEX_CD8<64, CD8VF>;
7667 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7668 PS, EVEX_CD8<32, CD8VH>;
7670 let Predicates = [HasVLX] in {
7671 // Special patterns to allow use of X86vmfpround for masking. Instruction
7672 // patterns have been disabled with null_frag.
7673 def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
7674 (VCVTPD2PSZ128rr VR128X:$src)>;
7675 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7677 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7678 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7680 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7682 def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
7683 (VCVTPD2PSZ128rm addr:$src)>;
7684 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7686 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7687 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7689 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7691 def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7692 (VCVTPD2PSZ128rmb addr:$src)>;
7693 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7694 (v4f32 VR128X:$src0), VK2WM:$mask),
7695 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7696 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7697 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7698 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7701 // Convert Signed/Unsigned Doubleword to Double
7702 let Uses = []<Register>, mayRaiseFPException = 0 in
7703 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7704 SDNode OpNode128, X86SchedWriteWidths sched> {
7705 // No rounding in this op
7706 let Predicates = [HasAVX512] in
7707 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7708 sched.ZMM>, EVEX_V512;
7710 let Predicates = [HasVLX] in {
7711 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7712 OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
7713 (v2f64 (OpNode128 (bc_v4i32
7715 (scalar_to_vector (loadi64 addr:$src))))))>,
7717 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7718 sched.YMM>, EVEX_V256;
7722 // Convert Signed/Unsigned Doubleword to Float
7723 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7724 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7725 let Predicates = [HasAVX512] in
7726 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7728 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7729 OpNodeRnd, sched.ZMM>, EVEX_V512;
7731 let Predicates = [HasVLX] in {
7732 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7733 sched.XMM>, EVEX_V128;
7734 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7735 sched.YMM>, EVEX_V256;
7739 // Convert Float to Signed/Unsigned Doubleword with truncation
7740 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7741 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7742 let Predicates = [HasAVX512] in {
7743 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7745 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7746 OpNodeSAE, sched.ZMM>, EVEX_V512;
7748 let Predicates = [HasVLX] in {
7749 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7750 sched.XMM>, EVEX_V128;
7751 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7752 sched.YMM>, EVEX_V256;
7756 // Convert Float to Signed/Unsigned Doubleword
7757 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7758 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7759 let Predicates = [HasAVX512] in {
7760 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7762 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7763 OpNodeRnd, sched.ZMM>, EVEX_V512;
7765 let Predicates = [HasVLX] in {
7766 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7767 sched.XMM>, EVEX_V128;
7768 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7769 sched.YMM>, EVEX_V256;
7773 // Convert Double to Signed/Unsigned Doubleword with truncation
7774 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7775 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7776 let Predicates = [HasAVX512] in {
7777 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7779 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7780 OpNodeSAE, sched.ZMM>, EVEX_V512;
7782 let Predicates = [HasVLX] in {
7783 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7784 // memory forms of these instructions in Asm Parser. They have the same
7785 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7786 // due to the same reason.
7787 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7788 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7790 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7791 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7794 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7795 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7796 VR128X:$src), 0, "att">;
7797 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7798 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7799 VK2WM:$mask, VR128X:$src), 0, "att">;
7800 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7801 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7802 VK2WM:$mask, VR128X:$src), 0, "att">;
7803 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7804 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7805 f64mem:$src), 0, "att">;
7806 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7807 "$dst {${mask}}, ${src}{1to2}}",
7808 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7809 VK2WM:$mask, f64mem:$src), 0, "att">;
7810 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7811 "$dst {${mask}} {z}, ${src}{1to2}}",
7812 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7813 VK2WM:$mask, f64mem:$src), 0, "att">;
7815 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7816 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7817 VR256X:$src), 0, "att">;
7818 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7819 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7820 VK4WM:$mask, VR256X:$src), 0, "att">;
7821 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7822 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7823 VK4WM:$mask, VR256X:$src), 0, "att">;
7824 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7825 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7826 f64mem:$src), 0, "att">;
7827 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7828 "$dst {${mask}}, ${src}{1to4}}",
7829 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7830 VK4WM:$mask, f64mem:$src), 0, "att">;
7831 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7832 "$dst {${mask}} {z}, ${src}{1to4}}",
7833 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7834 VK4WM:$mask, f64mem:$src), 0, "att">;
7837 // Convert Double to Signed/Unsigned Doubleword
7838 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7839 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7840 let Predicates = [HasAVX512] in {
7841 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7843 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7844 OpNodeRnd, sched.ZMM>, EVEX_V512;
7846 let Predicates = [HasVLX] in {
7847 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7848 // memory forms of these instructions in Asm Parcer. They have the same
7849 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7850 // due to the same reason.
7851 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7852 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7854 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7855 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7858 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7859 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7860 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7861 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7862 VK2WM:$mask, VR128X:$src), 0, "att">;
7863 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7864 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7865 VK2WM:$mask, VR128X:$src), 0, "att">;
7866 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7867 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7868 f64mem:$src), 0, "att">;
7869 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
7870 "$dst {${mask}}, ${src}{1to2}}",
7871 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7872 VK2WM:$mask, f64mem:$src), 0, "att">;
7873 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7874 "$dst {${mask}} {z}, ${src}{1to2}}",
7875 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7876 VK2WM:$mask, f64mem:$src), 0, "att">;
7878 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7879 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7880 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7881 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7882 VK4WM:$mask, VR256X:$src), 0, "att">;
7883 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7884 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7885 VK4WM:$mask, VR256X:$src), 0, "att">;
7886 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7887 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7888 f64mem:$src), 0, "att">;
7889 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
7890 "$dst {${mask}}, ${src}{1to4}}",
7891 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7892 VK4WM:$mask, f64mem:$src), 0, "att">;
7893 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7894 "$dst {${mask}} {z}, ${src}{1to4}}",
7895 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7896 VK4WM:$mask, f64mem:$src), 0, "att">;
7899 // Convert Double to Signed/Unsigned Quardword
7900 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7901 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7902 let Predicates = [HasDQI] in {
7903 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7905 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7906 OpNodeRnd, sched.ZMM>, EVEX_V512;
7908 let Predicates = [HasDQI, HasVLX] in {
7909 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7910 sched.XMM>, EVEX_V128;
7911 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7912 sched.YMM>, EVEX_V256;
7916 // Convert Double to Signed/Unsigned Quardword with truncation
7917 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7918 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7919 let Predicates = [HasDQI] in {
7920 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7922 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7923 OpNodeRnd, sched.ZMM>, EVEX_V512;
7925 let Predicates = [HasDQI, HasVLX] in {
7926 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7927 sched.XMM>, EVEX_V128;
7928 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7929 sched.YMM>, EVEX_V256;
7933 // Convert Signed/Unsigned Quardword to Double
7934 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7935 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7936 let Predicates = [HasDQI] in {
7937 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7939 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7940 OpNodeRnd, sched.ZMM>, EVEX_V512;
7942 let Predicates = [HasDQI, HasVLX] in {
7943 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7944 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
7945 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7946 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
7950 // Convert Float to Signed/Unsigned Quardword
7951 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7952 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7953 let Predicates = [HasDQI] in {
7954 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7956 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7957 OpNodeRnd, sched.ZMM>, EVEX_V512;
7959 let Predicates = [HasDQI, HasVLX] in {
7960 // Explicitly specified broadcast string, since we take only 2 elements
7961 // from v4f32x_info source
7962 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7963 sched.XMM, "{1to2}", "", f64mem, VK2WM,
7964 (v2i64 (OpNode (bc_v4f32
7966 (scalar_to_vector (loadf64 addr:$src))))))>,
7968 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7969 sched.YMM>, EVEX_V256;
7973 // Convert Float to Signed/Unsigned Quardword with truncation
7974 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7975 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7976 let Predicates = [HasDQI] in {
7977 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
7978 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7979 OpNodeRnd, sched.ZMM>, EVEX_V512;
7981 let Predicates = [HasDQI, HasVLX] in {
7982 // Explicitly specified broadcast string, since we take only 2 elements
7983 // from v4f32x_info source
7984 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7985 sched.XMM, "{1to2}", "", f64mem, VK2WM,
7986 (v2i64 (OpNode (bc_v4f32
7988 (scalar_to_vector (loadf64 addr:$src))))))>,
7990 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7991 sched.YMM>, EVEX_V256;
7995 // Convert Signed/Unsigned Quardword to Float
7996 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7997 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
7998 let Predicates = [HasDQI] in {
7999 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8001 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8002 OpNodeRnd, sched.ZMM>, EVEX_V512;
8004 let Predicates = [HasDQI, HasVLX] in {
8005 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8006 // memory forms of these instructions in Asm Parcer. They have the same
8007 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8008 // due to the same reason.
8009 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8010 sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8011 EVEX_V128, NotEVEX2VEXConvertible;
8012 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8013 sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8014 NotEVEX2VEXConvertible;
8017 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8018 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8019 VR128X:$src), 0, "att">;
8020 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8021 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8022 VK2WM:$mask, VR128X:$src), 0, "att">;
8023 def : InstAlias<OpcodeStr##"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8024 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8025 VK2WM:$mask, VR128X:$src), 0, "att">;
8026 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8027 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8028 i64mem:$src), 0, "att">;
8029 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}}|"
8030 "$dst {${mask}}, ${src}{1to2}}",
8031 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8032 VK2WM:$mask, i64mem:$src), 0, "att">;
8033 def : InstAlias<OpcodeStr##"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8034 "$dst {${mask}} {z}, ${src}{1to2}}",
8035 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8036 VK2WM:$mask, i64mem:$src), 0, "att">;
8038 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8039 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8040 VR256X:$src), 0, "att">;
8041 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}}|"
8042 "$dst {${mask}}, $src}",
8043 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8044 VK4WM:$mask, VR256X:$src), 0, "att">;
8045 def : InstAlias<OpcodeStr##"y\t{$src, $dst {${mask}} {z}|"
8046 "$dst {${mask}} {z}, $src}",
8047 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8048 VK4WM:$mask, VR256X:$src), 0, "att">;
8049 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8050 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8051 i64mem:$src), 0, "att">;
8052 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}}|"
8053 "$dst {${mask}}, ${src}{1to4}}",
8054 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8055 VK4WM:$mask, i64mem:$src), 0, "att">;
8056 def : InstAlias<OpcodeStr##"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8057 "$dst {${mask}} {z}, ${src}{1to4}}",
8058 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8059 VK4WM:$mask, i64mem:$src), 0, "att">;
8062 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86any_VSintToFP,
8063 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8065 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp,
8066 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8067 PS, EVEX_CD8<32, CD8VF>;
8069 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8070 X86cvttp2siSAE, SchedWriteCvtPS2DQ>,
8071 XS, EVEX_CD8<32, CD8VF>;
8073 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8074 X86cvttp2siSAE, SchedWriteCvtPD2DQ>,
8075 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8077 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8078 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PS,
8079 EVEX_CD8<32, CD8VF>;
8081 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8082 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>,
8083 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8085 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8086 X86any_VUintToFP, SchedWriteCvtDQ2PD>, XS,
8087 EVEX_CD8<32, CD8VH>;
8089 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8090 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8091 EVEX_CD8<32, CD8VF>;
8093 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8094 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8095 EVEX_CD8<32, CD8VF>;
8097 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8098 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8099 VEX_W, EVEX_CD8<64, CD8VF>;
8101 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8102 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8103 PS, EVEX_CD8<32, CD8VF>;
8105 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8106 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8107 PS, EVEX_CD8<64, CD8VF>;
8109 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8110 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8111 PD, EVEX_CD8<64, CD8VF>;
8113 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8114 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8115 EVEX_CD8<32, CD8VH>;
8117 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8118 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8119 PD, EVEX_CD8<64, CD8VF>;
8121 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8122 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8123 EVEX_CD8<32, CD8VH>;
8125 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8126 X86cvttp2siSAE, SchedWriteCvtPD2DQ>, VEX_W,
8127 PD, EVEX_CD8<64, CD8VF>;
8129 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8130 X86cvttp2siSAE, SchedWriteCvtPS2DQ>, PD,
8131 EVEX_CD8<32, CD8VH>;
8133 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8134 X86cvttp2uiSAE, SchedWriteCvtPD2DQ>, VEX_W,
8135 PD, EVEX_CD8<64, CD8VF>;
8137 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8138 X86cvttp2uiSAE, SchedWriteCvtPS2DQ>, PD,
8139 EVEX_CD8<32, CD8VH>;
8141 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8142 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8143 EVEX_CD8<64, CD8VF>;
8145 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8146 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8147 EVEX_CD8<64, CD8VF>;
8149 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
8150 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8151 EVEX_CD8<64, CD8VF>;
8153 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
8154 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8155 EVEX_CD8<64, CD8VF>;
8157 let Predicates = [HasVLX] in {
8158 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8159 // patterns have been disabled with null_frag.
8160 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8161 (VCVTPD2DQZ128rr VR128X:$src)>;
8162 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8164 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8165 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8167 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8169 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8170 (VCVTPD2DQZ128rm addr:$src)>;
8171 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8173 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8174 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8176 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8178 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8179 (VCVTPD2DQZ128rmb addr:$src)>;
8180 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8181 (v4i32 VR128X:$src0), VK2WM:$mask),
8182 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8183 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8184 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8185 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8187 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8188 // patterns have been disabled with null_frag.
8189 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8190 (VCVTTPD2DQZ128rr VR128X:$src)>;
8191 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8193 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8194 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8196 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8198 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8199 (VCVTTPD2DQZ128rm addr:$src)>;
8200 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8202 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8203 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8205 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8207 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8208 (VCVTTPD2DQZ128rmb addr:$src)>;
8209 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8210 (v4i32 VR128X:$src0), VK2WM:$mask),
8211 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8212 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8213 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8214 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8216 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8217 // patterns have been disabled with null_frag.
8218 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8219 (VCVTPD2UDQZ128rr VR128X:$src)>;
8220 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8222 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8223 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8225 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8227 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8228 (VCVTPD2UDQZ128rm addr:$src)>;
8229 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8231 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8232 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8234 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8236 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8237 (VCVTPD2UDQZ128rmb addr:$src)>;
8238 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8239 (v4i32 VR128X:$src0), VK2WM:$mask),
8240 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8241 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8242 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8243 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8245 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8246 // patterns have been disabled with null_frag.
8247 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8248 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8249 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8251 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8252 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8254 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8256 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8257 (VCVTTPD2UDQZ128rm addr:$src)>;
8258 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8260 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8261 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8263 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8265 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8266 (VCVTTPD2UDQZ128rmb addr:$src)>;
8267 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8268 (v4i32 VR128X:$src0), VK2WM:$mask),
8269 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8270 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8271 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8272 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8275 let Predicates = [HasDQI, HasVLX] in {
8276 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8277 (VCVTPS2QQZ128rm addr:$src)>;
8278 def : Pat<(v2i64 (vselect VK2WM:$mask,
8279 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8281 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8282 def : Pat<(v2i64 (vselect VK2WM:$mask,
8283 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8284 v2i64x_info.ImmAllZerosV)),
8285 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8287 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8288 (VCVTPS2UQQZ128rm addr:$src)>;
8289 def : Pat<(v2i64 (vselect VK2WM:$mask,
8290 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8292 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8293 def : Pat<(v2i64 (vselect VK2WM:$mask,
8294 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8295 v2i64x_info.ImmAllZerosV)),
8296 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8298 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8299 (VCVTTPS2QQZ128rm addr:$src)>;
8300 def : Pat<(v2i64 (vselect VK2WM:$mask,
8301 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8303 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8304 def : Pat<(v2i64 (vselect VK2WM:$mask,
8305 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8306 v2i64x_info.ImmAllZerosV)),
8307 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8309 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8310 (VCVTTPS2UQQZ128rm addr:$src)>;
8311 def : Pat<(v2i64 (vselect VK2WM:$mask,
8312 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8314 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8315 def : Pat<(v2i64 (vselect VK2WM:$mask,
8316 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8317 v2i64x_info.ImmAllZerosV)),
8318 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8321 let Predicates = [HasVLX] in {
8322 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8323 (VCVTDQ2PDZ128rm addr:$src)>;
8324 def : Pat<(v2f64 (vselect VK2WM:$mask,
8325 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8327 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8328 def : Pat<(v2f64 (vselect VK2WM:$mask,
8329 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8330 v2f64x_info.ImmAllZerosV)),
8331 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8333 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8334 (VCVTUDQ2PDZ128rm addr:$src)>;
8335 def : Pat<(v2f64 (vselect VK2WM:$mask,
8336 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8338 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8339 def : Pat<(v2f64 (vselect VK2WM:$mask,
8340 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8341 v2f64x_info.ImmAllZerosV)),
8342 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8345 let Predicates = [HasDQI, HasVLX] in {
8346 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8347 // patterns have been disabled with null_frag.
8348 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
8349 (VCVTQQ2PSZ128rr VR128X:$src)>;
8350 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8352 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8353 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8355 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8357 def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
8358 (VCVTQQ2PSZ128rm addr:$src)>;
8359 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8361 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8362 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8364 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8366 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8367 (VCVTQQ2PSZ128rmb addr:$src)>;
8368 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8369 (v4f32 VR128X:$src0), VK2WM:$mask),
8370 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8371 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8372 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8373 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8375 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8376 // patterns have been disabled with null_frag.
8377 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
8378 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8379 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8381 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8382 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8384 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8386 def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
8387 (VCVTUQQ2PSZ128rm addr:$src)>;
8388 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8390 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8391 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8393 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8395 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8396 (VCVTUQQ2PSZ128rmb addr:$src)>;
8397 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8398 (v4f32 VR128X:$src0), VK2WM:$mask),
8399 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8400 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8401 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8402 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8405 //===----------------------------------------------------------------------===//
8406 // Half precision conversion instructions
8407 //===----------------------------------------------------------------------===//
8409 let Uses = [MXCSR], mayRaiseFPException = 1 in
8410 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8411 X86MemOperand x86memop, PatFrag ld_frag,
8412 X86FoldableSchedWrite sched> {
8413 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8414 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8415 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8416 T8PD, Sched<[sched]>;
8417 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8418 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8419 (X86cvtph2ps (_src.VT
8420 (ld_frag addr:$src)))>,
8421 T8PD, Sched<[sched.Folded]>;
8424 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8425 X86FoldableSchedWrite sched> {
8426 let Uses = [MXCSR] in
8427 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8428 (ins _src.RC:$src), "vcvtph2ps",
8429 "{sae}, $src", "$src, {sae}",
8430 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8431 T8PD, EVEX_B, Sched<[sched]>;
8434 let Predicates = [HasAVX512] in
8435 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8437 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8438 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8440 let Predicates = [HasVLX] in {
8441 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8442 load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8443 EVEX_CD8<32, CD8VH>;
8444 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8445 load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8446 EVEX_CD8<32, CD8VH>;
8448 // Pattern match vcvtph2ps of a scalar i64 load.
8449 def : Pat<(v4f32 (X86cvtph2ps (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
8450 (VCVTPH2PSZ128rm addr:$src)>;
8451 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8452 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8453 (VCVTPH2PSZ128rm addr:$src)>;
8456 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8457 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8458 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8459 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8460 (ins _src.RC:$src1, i32u8imm:$src2),
8461 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8462 [(set _dest.RC:$dst,
8463 (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8465 let Constraints = "$src0 = $dst" in
8466 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8467 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8468 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8469 [(set _dest.RC:$dst,
8470 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8471 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8472 Sched<[RR]>, EVEX_K;
8473 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8474 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8475 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8476 [(set _dest.RC:$dst,
8477 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8478 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8479 Sched<[RR]>, EVEX_KZ;
8480 let hasSideEffects = 0, mayStore = 1 in {
8481 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8482 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8483 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8485 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8486 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8487 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8488 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8493 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8495 let hasSideEffects = 0, Uses = [MXCSR] in
8496 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8497 (outs _dest.RC:$dst),
8498 (ins _src.RC:$src1, i32u8imm:$src2),
8499 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8500 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8503 let Predicates = [HasAVX512] in {
8504 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8505 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8506 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8507 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8508 let Predicates = [HasVLX] in {
8509 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8510 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8511 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8512 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8513 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8514 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8517 def : Pat<(store (f64 (extractelt
8518 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8519 (iPTR 0))), addr:$dst),
8520 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8521 def : Pat<(store (i64 (extractelt
8522 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, timm:$src2))),
8523 (iPTR 0))), addr:$dst),
8524 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8525 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8526 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8527 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8528 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8531 // Patterns for matching conversions from float to half-float and vice versa.
8532 let Predicates = [HasVLX] in {
8533 // Use MXCSR.RC for rounding instead of explicitly specifying the default
8534 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
8535 // configurations we support (the default). However, falling back to MXCSR is
8536 // more consistent with other instructions, which are always controlled by it.
8537 // It's encoded as 0b100.
8538 def : Pat<(fp_to_f16 FR32X:$src),
8539 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
8540 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
8542 def : Pat<(f16_to_fp GR16:$src),
8543 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8544 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
8546 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
8547 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
8548 (v8i16 (VCVTPS2PHZ128rr
8549 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
8552 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8553 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8554 string OpcodeStr, Domain d,
8555 X86FoldableSchedWrite sched = WriteFCom> {
8556 let hasSideEffects = 0, Uses = [MXCSR] in
8557 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8558 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8559 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8562 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8563 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
8564 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8565 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
8566 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8567 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
8568 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8569 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
8570 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8573 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8574 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
8575 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8576 EVEX_CD8<32, CD8VT1>;
8577 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
8578 "ucomisd", SSEPackedDouble>, PD, EVEX,
8579 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8580 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
8581 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8582 EVEX_CD8<32, CD8VT1>;
8583 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
8584 "comisd", SSEPackedDouble>, PD, EVEX,
8585 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8586 let isCodeGenOnly = 1 in {
8587 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8588 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8589 EVEX_CD8<32, CD8VT1>;
8590 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8591 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
8592 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8594 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8595 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8596 EVEX_CD8<32, CD8VT1>;
8597 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8598 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
8599 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8603 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8604 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8605 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8606 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8607 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8608 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8609 "$src2, $src1", "$src1, $src2",
8610 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8611 EVEX_4V, VEX_LIG, Sched<[sched]>;
8612 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8613 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8614 "$src2, $src1", "$src1, $src2",
8615 (OpNode (_.VT _.RC:$src1),
8616 _.ScalarIntMemCPat:$src2)>, EVEX_4V, VEX_LIG,
8617 Sched<[sched.Folded, sched.ReadAfterFold]>;
8621 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8622 f32x_info>, EVEX_CD8<32, CD8VT1>,
8624 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8625 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8627 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8628 SchedWriteFRsqrt.Scl, f32x_info>,
8629 EVEX_CD8<32, CD8VT1>, T8PD;
8630 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8631 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8632 EVEX_CD8<64, CD8VT1>, T8PD;
8634 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8635 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8636 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8637 let ExeDomain = _.ExeDomain in {
8638 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8639 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8640 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8642 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8643 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8645 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8646 Sched<[sched.Folded, sched.ReadAfterFold]>;
8647 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8648 (ins _.ScalarMemOp:$src), OpcodeStr,
8649 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8651 (_.BroadcastLdFrag addr:$src)))>,
8652 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8656 let Uses = [MXCSR] in
8657 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8658 X86SchedWriteWidths sched> {
8659 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8660 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8661 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8662 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8664 // Define only if AVX512VL feature is present.
8665 let Predicates = [HasVLX] in {
8666 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8667 OpNode, sched.XMM, v4f32x_info>,
8668 EVEX_V128, EVEX_CD8<32, CD8VF>;
8669 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8670 OpNode, sched.YMM, v8f32x_info>,
8671 EVEX_V256, EVEX_CD8<32, CD8VF>;
8672 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8673 OpNode, sched.XMM, v2f64x_info>,
8674 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8675 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8676 OpNode, sched.YMM, v4f64x_info>,
8677 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8681 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8682 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8684 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8685 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8686 SDNode OpNode, SDNode OpNodeSAE,
8687 X86FoldableSchedWrite sched> {
8688 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8689 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8690 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8691 "$src2, $src1", "$src1, $src2",
8692 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8693 Sched<[sched]>, SIMD_EXC;
8695 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8696 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8697 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8698 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8699 EVEX_B, Sched<[sched]>;
8701 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8702 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8703 "$src2, $src1", "$src1, $src2",
8704 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>,
8705 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8709 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8710 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8711 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8712 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8713 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8714 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8717 let Predicates = [HasERI] in {
8718 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8719 SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8720 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8721 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8724 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8725 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8726 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8728 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8729 SDNode OpNode, X86FoldableSchedWrite sched> {
8730 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8731 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8732 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8733 (OpNode (_.VT _.RC:$src))>,
8736 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8737 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8739 (bitconvert (_.LdFrag addr:$src))))>,
8740 Sched<[sched.Folded, sched.ReadAfterFold]>;
8742 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8743 (ins _.ScalarMemOp:$src), OpcodeStr,
8744 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8746 (_.BroadcastLdFrag addr:$src)))>,
8747 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8750 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8751 SDNode OpNode, X86FoldableSchedWrite sched> {
8752 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
8753 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8754 (ins _.RC:$src), OpcodeStr,
8755 "{sae}, $src", "$src, {sae}",
8756 (OpNode (_.VT _.RC:$src))>,
8757 EVEX_B, Sched<[sched]>;
8760 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8761 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8762 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8763 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8764 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8765 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8766 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8767 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8770 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8771 SDNode OpNode, X86SchedWriteWidths sched> {
8772 // Define only if AVX512VL feature is present.
8773 let Predicates = [HasVLX] in {
8774 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8776 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8777 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8779 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8780 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8782 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8783 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8785 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8789 let Predicates = [HasERI] in {
8790 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8791 SchedWriteFRsqrt>, EVEX;
8792 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8793 SchedWriteFRcp>, EVEX;
8794 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8795 SchedWriteFAdd>, EVEX;
8797 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8799 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8800 SchedWriteFRnd>, EVEX;
8802 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8803 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8804 let ExeDomain = _.ExeDomain in
8805 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8806 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8807 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8808 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8811 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8812 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8813 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8814 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8815 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8816 (_.VT (any_fsqrt _.RC:$src))>, EVEX,
8818 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8819 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8821 (bitconvert (_.LdFrag addr:$src))))>, EVEX,
8822 Sched<[sched.Folded, sched.ReadAfterFold]>;
8823 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8824 (ins _.ScalarMemOp:$src), OpcodeStr,
8825 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
8827 (_.BroadcastLdFrag addr:$src)))>,
8828 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8832 let Uses = [MXCSR], mayRaiseFPException = 1 in
8833 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8834 X86SchedWriteSizes sched> {
8835 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8836 sched.PS.ZMM, v16f32_info>,
8837 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8838 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8839 sched.PD.ZMM, v8f64_info>,
8840 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8841 // Define only if AVX512VL feature is present.
8842 let Predicates = [HasVLX] in {
8843 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8844 sched.PS.XMM, v4f32x_info>,
8845 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8846 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8847 sched.PS.YMM, v8f32x_info>,
8848 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8849 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8850 sched.PD.XMM, v2f64x_info>,
8851 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8852 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8853 sched.PD.YMM, v4f64x_info>,
8854 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8858 let Uses = [MXCSR] in
8859 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8860 X86SchedWriteSizes sched> {
8861 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8862 sched.PS.ZMM, v16f32_info>,
8863 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8864 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8865 sched.PD.ZMM, v8f64_info>,
8866 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8869 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8870 X86VectorVTInfo _, string Name> {
8871 let ExeDomain = _.ExeDomain in {
8872 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8873 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8874 "$src2, $src1", "$src1, $src2",
8875 (X86fsqrts (_.VT _.RC:$src1),
8876 (_.VT _.RC:$src2))>,
8877 Sched<[sched]>, SIMD_EXC;
8878 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8879 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8880 "$src2, $src1", "$src1, $src2",
8881 (X86fsqrts (_.VT _.RC:$src1),
8882 _.ScalarIntMemCPat:$src2)>,
8883 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8884 let Uses = [MXCSR] in
8885 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8886 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8887 "$rc, $src2, $src1", "$src1, $src2, $rc",
8888 (X86fsqrtRnds (_.VT _.RC:$src1),
8891 EVEX_B, EVEX_RC, Sched<[sched]>;
8893 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8894 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8895 (ins _.FRC:$src1, _.FRC:$src2),
8896 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8897 Sched<[sched]>, SIMD_EXC;
8899 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8900 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8901 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8902 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8906 let Predicates = [HasAVX512] in {
8907 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
8908 (!cast<Instruction>(Name#Zr)
8909 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
8912 let Predicates = [HasAVX512, OptForSize] in {
8913 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
8914 (!cast<Instruction>(Name#Zm)
8915 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
8919 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
8920 X86SchedWriteSizes sched> {
8921 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
8922 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
8923 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
8924 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
8927 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
8928 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
8930 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
8932 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
8933 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8934 let ExeDomain = _.ExeDomain in {
8935 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8936 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8937 "$src3, $src2, $src1", "$src1, $src2, $src3",
8938 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8939 (i32 timm:$src3)))>,
8940 Sched<[sched]>, SIMD_EXC;
8942 let Uses = [MXCSR] in
8943 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8944 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
8945 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
8946 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
8947 (i32 timm:$src3)))>, EVEX_B,
8950 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8951 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
8953 "$src3, $src2, $src1", "$src1, $src2, $src3",
8954 (_.VT (X86RndScales _.RC:$src1,
8955 _.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
8956 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8958 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
8959 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8960 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
8961 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8962 []>, Sched<[sched]>, SIMD_EXC;
8965 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8966 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8967 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8968 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8972 let Predicates = [HasAVX512] in {
8973 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
8974 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8975 _.FRC:$src1, timm:$src2))>;
8978 let Predicates = [HasAVX512, OptForSize] in {
8979 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
8980 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8981 addr:$src1, timm:$src2))>;
8985 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
8986 SchedWriteFRnd.Scl, f32x_info>,
8987 AVX512AIi8Base, EVEX_4V, VEX_LIG,
8988 EVEX_CD8<32, CD8VT1>;
8990 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
8991 SchedWriteFRnd.Scl, f64x_info>,
8992 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
8993 EVEX_CD8<64, CD8VT1>;
8995 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
8996 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
8997 dag OutMask, Predicate BasePredicate> {
8998 let Predicates = [BasePredicate] in {
8999 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9000 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9001 (extractelt _.VT:$dst, (iPTR 0))))),
9002 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9003 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9005 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9006 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9008 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9009 OutMask, _.VT:$src2, _.VT:$src1)>;
9013 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9014 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9015 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9016 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9017 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9018 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9021 //-------------------------------------------------
9022 // Integer truncate and extend operations
9023 //-------------------------------------------------
9025 // PatFrags that contain a select and a truncate op. The take operands in the
9026 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9027 // either to the multiclasses.
9028 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9029 (vselect node:$mask,
9030 (trunc node:$src), node:$src0)>;
9031 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9032 (vselect node:$mask,
9033 (X86vtruncs node:$src), node:$src0)>;
9034 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9035 (vselect node:$mask,
9036 (X86vtruncus node:$src), node:$src0)>;
9038 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9039 SDPatternOperator MaskNode,
9040 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9041 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9042 let ExeDomain = DestInfo.ExeDomain in {
9043 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9044 (ins SrcInfo.RC:$src),
9045 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9046 [(set DestInfo.RC:$dst,
9047 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9048 EVEX, Sched<[sched]>;
9049 let Constraints = "$src0 = $dst" in
9050 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9051 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9052 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9053 [(set DestInfo.RC:$dst,
9054 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9055 (DestInfo.VT DestInfo.RC:$src0),
9056 SrcInfo.KRCWM:$mask))]>,
9057 EVEX, EVEX_K, Sched<[sched]>;
9058 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9059 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9060 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9061 [(set DestInfo.RC:$dst,
9062 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9063 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9064 EVEX, EVEX_KZ, Sched<[sched]>;
9067 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9068 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9069 (ins x86memop:$dst, SrcInfo.RC:$src),
9070 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9071 EVEX, Sched<[sched.Folded]>;
9073 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9074 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9075 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9076 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9077 }//mayStore = 1, hasSideEffects = 0
9080 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9081 X86VectorVTInfo DestInfo,
9082 PatFrag truncFrag, PatFrag mtruncFrag,
9085 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9086 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9087 addr:$dst, SrcInfo.RC:$src)>;
9089 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9090 SrcInfo.KRCWM:$mask),
9091 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9092 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9095 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9096 SDNode OpNode256, SDNode OpNode512,
9097 SDPatternOperator MaskNode128,
9098 SDPatternOperator MaskNode256,
9099 SDPatternOperator MaskNode512,
9100 X86FoldableSchedWrite sched,
9101 AVX512VLVectorVTInfo VTSrcInfo,
9102 X86VectorVTInfo DestInfoZ128,
9103 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9104 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9105 X86MemOperand x86memopZ, PatFrag truncFrag,
9106 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9108 let Predicates = [HasVLX, prd] in {
9109 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9110 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9111 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9112 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9114 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9115 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9116 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9117 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9119 let Predicates = [prd] in
9120 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9121 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9122 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9123 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9126 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9127 SDPatternOperator MaskNode,
9128 X86FoldableSchedWrite sched, PatFrag StoreNode,
9129 PatFrag MaskedStoreNode, SDNode InVecNode,
9130 SDPatternOperator InVecMaskNode> {
9131 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9132 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9133 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9134 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9135 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9138 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9139 SDPatternOperator MaskNode,
9140 X86FoldableSchedWrite sched, PatFrag StoreNode,
9141 PatFrag MaskedStoreNode, SDNode InVecNode,
9142 SDPatternOperator InVecMaskNode> {
9143 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9144 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9145 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9146 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9147 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9150 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9151 SDPatternOperator MaskNode,
9152 X86FoldableSchedWrite sched, PatFrag StoreNode,
9153 PatFrag MaskedStoreNode, SDNode InVecNode,
9154 SDPatternOperator InVecMaskNode> {
9155 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9156 InVecMaskNode, MaskNode, MaskNode, sched,
9157 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9158 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9159 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9162 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9163 SDPatternOperator MaskNode,
9164 X86FoldableSchedWrite sched, PatFrag StoreNode,
9165 PatFrag MaskedStoreNode, SDNode InVecNode,
9166 SDPatternOperator InVecMaskNode> {
9167 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9168 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9169 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9170 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9171 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9174 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9175 SDPatternOperator MaskNode,
9176 X86FoldableSchedWrite sched, PatFrag StoreNode,
9177 PatFrag MaskedStoreNode, SDNode InVecNode,
9178 SDPatternOperator InVecMaskNode> {
9179 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9180 InVecMaskNode, MaskNode, MaskNode, sched,
9181 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9182 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9183 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9186 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9187 SDPatternOperator MaskNode,
9188 X86FoldableSchedWrite sched, PatFrag StoreNode,
9189 PatFrag MaskedStoreNode, SDNode InVecNode,
9190 SDPatternOperator InVecMaskNode> {
9191 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9192 InVecMaskNode, MaskNode, MaskNode, sched,
9193 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9194 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9195 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9198 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
9199 WriteShuffle256, truncstorevi8,
9200 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9201 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
9202 WriteShuffle256, truncstore_s_vi8,
9203 masked_truncstore_s_vi8, X86vtruncs,
9205 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9206 select_truncus, WriteShuffle256,
9207 truncstore_us_vi8, masked_truncstore_us_vi8,
9208 X86vtruncus, X86vmtruncus>;
9210 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9211 WriteShuffle256, truncstorevi16,
9212 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9213 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9214 WriteShuffle256, truncstore_s_vi16,
9215 masked_truncstore_s_vi16, X86vtruncs,
9217 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9218 select_truncus, WriteShuffle256,
9219 truncstore_us_vi16, masked_truncstore_us_vi16,
9220 X86vtruncus, X86vmtruncus>;
9222 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9223 WriteShuffle256, truncstorevi32,
9224 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9225 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9226 WriteShuffle256, truncstore_s_vi32,
9227 masked_truncstore_s_vi32, X86vtruncs,
9229 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9230 select_truncus, WriteShuffle256,
9231 truncstore_us_vi32, masked_truncstore_us_vi32,
9232 X86vtruncus, X86vmtruncus>;
9234 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9235 WriteShuffle256, truncstorevi8,
9236 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9237 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9238 WriteShuffle256, truncstore_s_vi8,
9239 masked_truncstore_s_vi8, X86vtruncs,
9241 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9242 select_truncus, WriteShuffle256,
9243 truncstore_us_vi8, masked_truncstore_us_vi8,
9244 X86vtruncus, X86vmtruncus>;
9246 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9247 WriteShuffle256, truncstorevi16,
9248 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9249 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9250 WriteShuffle256, truncstore_s_vi16,
9251 masked_truncstore_s_vi16, X86vtruncs,
9253 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9254 select_truncus, WriteShuffle256,
9255 truncstore_us_vi16, masked_truncstore_us_vi16,
9256 X86vtruncus, X86vmtruncus>;
9258 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9259 WriteShuffle256, truncstorevi8,
9260 masked_truncstorevi8, X86vtrunc,
9262 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9263 WriteShuffle256, truncstore_s_vi8,
9264 masked_truncstore_s_vi8, X86vtruncs,
9266 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9267 select_truncus, WriteShuffle256,
9268 truncstore_us_vi8, masked_truncstore_us_vi8,
9269 X86vtruncus, X86vmtruncus>;
9271 let Predicates = [HasAVX512, NoVLX] in {
9272 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9273 (v8i16 (EXTRACT_SUBREG
9274 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9275 VR256X:$src, sub_ymm)))), sub_xmm))>;
9276 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9277 (v4i32 (EXTRACT_SUBREG
9278 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9279 VR256X:$src, sub_ymm)))), sub_xmm))>;
9282 let Predicates = [HasBWI, NoVLX] in {
9283 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9284 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9285 VR256X:$src, sub_ymm))), sub_xmm))>;
9288 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9289 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9290 X86VectorVTInfo DestInfo,
9291 X86VectorVTInfo SrcInfo> {
9292 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9294 SrcInfo.KRCWM:$mask)),
9295 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9296 SrcInfo.KRCWM:$mask,
9299 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9300 DestInfo.ImmAllZerosV,
9301 SrcInfo.KRCWM:$mask)),
9302 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9306 let Predicates = [HasVLX] in {
9307 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9308 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9309 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9312 let Predicates = [HasAVX512] in {
9313 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9314 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9315 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9317 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9318 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9319 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9321 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9322 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9323 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9326 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9327 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9328 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9329 let ExeDomain = DestInfo.ExeDomain in {
9330 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9331 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9332 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9333 EVEX, Sched<[sched]>;
9335 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9336 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9337 (DestInfo.VT (LdFrag addr:$src))>,
9338 EVEX, Sched<[sched.Folded]>;
9342 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9343 SDNode OpNode, SDNode InVecNode, string ExtTy,
9344 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9345 let Predicates = [HasVLX, HasBWI] in {
9346 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9347 v16i8x_info, i64mem, LdFrag, InVecNode>,
9348 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9350 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9351 v16i8x_info, i128mem, LdFrag, OpNode>,
9352 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9354 let Predicates = [HasBWI] in {
9355 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9356 v32i8x_info, i256mem, LdFrag, OpNode>,
9357 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9361 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9362 SDNode OpNode, SDNode InVecNode, string ExtTy,
9363 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9364 let Predicates = [HasVLX, HasAVX512] in {
9365 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9366 v16i8x_info, i32mem, LdFrag, InVecNode>,
9367 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9369 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9370 v16i8x_info, i64mem, LdFrag, InVecNode>,
9371 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9373 let Predicates = [HasAVX512] in {
9374 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9375 v16i8x_info, i128mem, LdFrag, OpNode>,
9376 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9380 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9381 SDNode OpNode, SDNode InVecNode, string ExtTy,
9382 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9383 let Predicates = [HasVLX, HasAVX512] in {
9384 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9385 v16i8x_info, i16mem, LdFrag, InVecNode>,
9386 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9388 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9389 v16i8x_info, i32mem, LdFrag, InVecNode>,
9390 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9392 let Predicates = [HasAVX512] in {
9393 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9394 v16i8x_info, i64mem, LdFrag, InVecNode>,
9395 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9399 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9400 SDNode OpNode, SDNode InVecNode, string ExtTy,
9401 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9402 let Predicates = [HasVLX, HasAVX512] in {
9403 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9404 v8i16x_info, i64mem, LdFrag, InVecNode>,
9405 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9407 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9408 v8i16x_info, i128mem, LdFrag, OpNode>,
9409 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9411 let Predicates = [HasAVX512] in {
9412 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9413 v16i16x_info, i256mem, LdFrag, OpNode>,
9414 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9418 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9419 SDNode OpNode, SDNode InVecNode, string ExtTy,
9420 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9421 let Predicates = [HasVLX, HasAVX512] in {
9422 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9423 v8i16x_info, i32mem, LdFrag, InVecNode>,
9424 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9426 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9427 v8i16x_info, i64mem, LdFrag, InVecNode>,
9428 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9430 let Predicates = [HasAVX512] in {
9431 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9432 v8i16x_info, i128mem, LdFrag, OpNode>,
9433 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9437 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9438 SDNode OpNode, SDNode InVecNode, string ExtTy,
9439 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9441 let Predicates = [HasVLX, HasAVX512] in {
9442 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9443 v4i32x_info, i64mem, LdFrag, InVecNode>,
9444 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9446 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9447 v4i32x_info, i128mem, LdFrag, OpNode>,
9448 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9450 let Predicates = [HasAVX512] in {
9451 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9452 v8i32x_info, i256mem, LdFrag, OpNode>,
9453 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9457 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9458 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9459 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9460 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9461 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9462 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9464 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9465 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9466 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9467 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9468 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9469 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9472 // Patterns that we also need any extend versions of. aext_vector_inreg
9473 // is currently legalized to zext_vector_inreg.
9474 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9476 let Predicates = [HasVLX, HasBWI] in {
9477 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9478 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9481 let Predicates = [HasVLX] in {
9482 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9483 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9485 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9486 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9490 let Predicates = [HasBWI] in {
9491 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9492 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9494 let Predicates = [HasAVX512] in {
9495 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9496 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9497 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9498 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9500 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9501 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9503 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9504 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9508 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9510 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9512 let Predicates = [HasVLX, HasBWI] in {
9513 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9514 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9515 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9516 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9517 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9518 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9520 let Predicates = [HasVLX] in {
9521 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9522 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9523 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9524 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9526 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9527 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9529 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9530 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9531 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9532 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9533 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9534 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9536 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9537 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9538 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9539 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9541 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9542 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9543 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9544 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9545 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9546 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9548 let Predicates = [HasVLX] in {
9549 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9550 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9551 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9552 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9554 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9555 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9556 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9557 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9559 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9560 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9561 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9562 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9565 let Predicates = [HasAVX512] in {
9566 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9567 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9571 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9572 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9574 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9575 // ext+trunc aggressively making it impossible to legalize the DAG to this
9576 // pattern directly.
9577 let Predicates = [HasAVX512, NoBWI] in {
9578 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9579 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9580 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9581 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9584 //===----------------------------------------------------------------------===//
9585 // GATHER - SCATTER Operations
9587 // FIXME: Improve scheduling of gather/scatter instructions.
9588 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9589 X86MemOperand memop, PatFrag GatherNode,
9590 RegisterClass MaskRC = _.KRCWM> {
9591 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9592 ExeDomain = _.ExeDomain in
9593 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9594 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9595 !strconcat(OpcodeStr#_.Suffix,
9596 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9597 [(set _.RC:$dst, MaskRC:$mask_wb,
9598 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
9599 vectoraddr:$src2))]>, EVEX, EVEX_K,
9600 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9603 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9604 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9605 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
9606 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
9607 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
9608 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
9609 let Predicates = [HasVLX] in {
9610 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9611 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
9612 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
9613 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
9614 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9615 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
9616 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9617 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
9621 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9622 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9623 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
9624 mgatherv16i32>, EVEX_V512;
9625 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
9626 mgatherv8i64>, EVEX_V512;
9627 let Predicates = [HasVLX] in {
9628 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
9629 vy256xmem, mgatherv8i32>, EVEX_V256;
9630 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9631 vy128xmem, mgatherv4i64>, EVEX_V256;
9632 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
9633 vx128xmem, mgatherv4i32>, EVEX_V128;
9634 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
9635 vx64xmem, mgatherv2i64, VK2WM>,
9641 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9642 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9644 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9645 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9647 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9648 X86MemOperand memop, PatFrag ScatterNode,
9649 RegisterClass MaskRC = _.KRCWM> {
9651 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
9653 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9654 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9655 !strconcat(OpcodeStr#_.Suffix,
9656 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9657 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
9658 MaskRC:$mask, vectoraddr:$dst))]>,
9659 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9660 Sched<[WriteStore]>;
9663 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9664 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9665 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
9666 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
9667 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
9668 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
9669 let Predicates = [HasVLX] in {
9670 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9671 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
9672 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
9673 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
9674 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9675 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
9676 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9677 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
9681 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9682 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9683 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
9684 mscatterv16i32>, EVEX_V512;
9685 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
9686 mscatterv8i64>, EVEX_V512;
9687 let Predicates = [HasVLX] in {
9688 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
9689 vy256xmem, mscatterv8i32>, EVEX_V256;
9690 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9691 vy128xmem, mscatterv4i64>, EVEX_V256;
9692 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
9693 vx128xmem, mscatterv4i32>, EVEX_V128;
9694 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
9695 vx64xmem, mscatterv2i64, VK2WM>,
9700 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9701 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9703 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9704 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9707 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9708 RegisterClass KRC, X86MemOperand memop> {
9709 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9710 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9711 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9712 EVEX, EVEX_K, Sched<[WriteLoad]>;
9715 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9716 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9718 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9719 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9721 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9722 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9724 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9725 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9727 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9728 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9730 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9731 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9733 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9734 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9736 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9737 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9739 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9740 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9742 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9743 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9745 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9746 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9748 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9749 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9751 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9752 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9754 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9755 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9757 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9758 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9760 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9761 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9763 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9764 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9765 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9766 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9767 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9769 // Also need a pattern for anyextend.
9770 def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
9771 (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
9774 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9775 string OpcodeStr, Predicate prd> {
9776 let Predicates = [prd] in
9777 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9779 let Predicates = [prd, HasVLX] in {
9780 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9781 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9785 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9786 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9787 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9788 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9790 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9791 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9792 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9793 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9794 EVEX, Sched<[WriteMove]>;
9797 // Use 512bit version to implement 128/256 bit in case NoVLX.
9798 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9802 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9803 (_.KVT (COPY_TO_REGCLASS
9804 (!cast<Instruction>(Name#"Zrr")
9805 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9806 _.RC:$src, _.SubRegIdx)),
9810 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9811 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9812 let Predicates = [prd] in
9813 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9816 let Predicates = [prd, HasVLX] in {
9817 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9819 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9822 let Predicates = [prd, NoVLX] in {
9823 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9824 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9828 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9829 avx512vl_i8_info, HasBWI>;
9830 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9831 avx512vl_i16_info, HasBWI>, VEX_W;
9832 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9833 avx512vl_i32_info, HasDQI>;
9834 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9835 avx512vl_i64_info, HasDQI>, VEX_W;
9837 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9838 // is available, but BWI is not. We can't handle this in lowering because
9839 // a target independent DAG combine likes to combine sext and trunc.
9840 let Predicates = [HasDQI, NoBWI] in {
9841 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9842 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9843 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9844 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9846 def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
9847 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9848 def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
9849 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9852 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9853 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9854 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9856 def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
9857 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9860 //===----------------------------------------------------------------------===//
9861 // AVX-512 - COMPRESS and EXPAND
9864 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9865 string OpcodeStr, X86FoldableSchedWrite sched> {
9866 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9867 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9868 (null_frag)>, AVX5128IBase,
9871 let mayStore = 1, hasSideEffects = 0 in
9872 def mr : AVX5128I<opc, MRMDestMem, (outs),
9873 (ins _.MemOp:$dst, _.RC:$src),
9874 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9875 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9876 Sched<[sched.Folded]>;
9878 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9879 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9880 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9882 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9883 Sched<[sched.Folded]>;
9886 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9887 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9888 (!cast<Instruction>(Name#_.ZSuffix##mrk)
9889 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9891 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9892 (!cast<Instruction>(Name#_.ZSuffix##rrk)
9893 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9894 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9895 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
9896 _.KRCWM:$mask, _.RC:$src)>;
9899 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9900 X86FoldableSchedWrite sched,
9901 AVX512VLVectorVTInfo VTInfo,
9902 Predicate Pred = HasAVX512> {
9903 let Predicates = [Pred] in
9904 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9905 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9907 let Predicates = [Pred, HasVLX] in {
9908 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9909 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9910 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9911 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9915 // FIXME: Is there a better scheduler class for VPCOMPRESS?
9916 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
9917 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
9918 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
9919 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
9920 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
9921 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
9922 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
9923 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
9926 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
9927 string OpcodeStr, X86FoldableSchedWrite sched> {
9928 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9929 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9930 (null_frag)>, AVX5128IBase,
9933 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9934 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
9936 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
9937 Sched<[sched.Folded, sched.ReadAfterFold]>;
9940 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9942 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
9943 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9944 _.KRCWM:$mask, addr:$src)>;
9946 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
9947 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
9948 _.KRCWM:$mask, addr:$src)>;
9950 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
9951 (_.VT _.RC:$src0))),
9952 (!cast<Instruction>(Name#_.ZSuffix##rmk)
9953 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
9955 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9956 (!cast<Instruction>(Name#_.ZSuffix##rrk)
9957 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9958 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9959 (!cast<Instruction>(Name#_.ZSuffix##rrkz)
9960 _.KRCWM:$mask, _.RC:$src)>;
9963 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
9964 X86FoldableSchedWrite sched,
9965 AVX512VLVectorVTInfo VTInfo,
9966 Predicate Pred = HasAVX512> {
9967 let Predicates = [Pred] in
9968 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
9969 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9971 let Predicates = [Pred, HasVLX] in {
9972 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
9973 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9974 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
9975 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
9979 // FIXME: Is there a better scheduler class for VPEXPAND?
9980 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
9981 avx512vl_i32_info>, EVEX;
9982 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
9983 avx512vl_i64_info>, EVEX, VEX_W;
9984 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
9985 avx512vl_f32_info>, EVEX;
9986 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
9987 avx512vl_f64_info>, EVEX, VEX_W;
9989 //handle instruction reg_vec1 = op(reg_vec,imm)
9991 // op(broadcast(eltVt),imm)
9992 //all instruction created with FROUND_CURRENT
9993 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9994 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9995 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
9996 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9997 (ins _.RC:$src1, i32u8imm:$src2),
9998 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
9999 (OpNode (_.VT _.RC:$src1),
10000 (i32 timm:$src2))>, Sched<[sched]>;
10001 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10002 (ins _.MemOp:$src1, i32u8imm:$src2),
10003 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10004 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10005 (i32 timm:$src2))>,
10006 Sched<[sched.Folded, sched.ReadAfterFold]>;
10007 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10008 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10009 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10010 "${src1}"##_.BroadcastStr##", $src2",
10011 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10012 (i32 timm:$src2))>, EVEX_B,
10013 Sched<[sched.Folded, sched.ReadAfterFold]>;
10017 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10018 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10019 SDNode OpNode, X86FoldableSchedWrite sched,
10020 X86VectorVTInfo _> {
10021 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10022 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10023 (ins _.RC:$src1, i32u8imm:$src2),
10024 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10025 "$src1, {sae}, $src2",
10026 (OpNode (_.VT _.RC:$src1),
10027 (i32 timm:$src2))>,
10028 EVEX_B, Sched<[sched]>;
10031 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10032 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10033 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10034 let Predicates = [prd] in {
10035 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10037 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10038 sched.ZMM, _.info512>, EVEX_V512;
10040 let Predicates = [prd, HasVLX] in {
10041 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10042 _.info128>, EVEX_V128;
10043 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10044 _.info256>, EVEX_V256;
10048 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10049 // op(reg_vec2,mem_vec,imm)
10050 // op(reg_vec2,broadcast(eltVt),imm)
10051 //all instruction created with FROUND_CURRENT
10052 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10053 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10054 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10055 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10056 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10057 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10058 (OpNode (_.VT _.RC:$src1),
10060 (i32 timm:$src3))>,
10062 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10063 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10064 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10065 (OpNode (_.VT _.RC:$src1),
10066 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10067 (i32 timm:$src3))>,
10068 Sched<[sched.Folded, sched.ReadAfterFold]>;
10069 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10070 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10071 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10072 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10073 (OpNode (_.VT _.RC:$src1),
10074 (_.VT (_.BroadcastLdFrag addr:$src2)),
10075 (i32 timm:$src3))>, EVEX_B,
10076 Sched<[sched.Folded, sched.ReadAfterFold]>;
10080 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10081 // op(reg_vec2,mem_vec,imm)
10082 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10083 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10084 X86VectorVTInfo SrcInfo>{
10085 let ExeDomain = DestInfo.ExeDomain in {
10086 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10087 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10088 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10089 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10090 (SrcInfo.VT SrcInfo.RC:$src2),
10091 (i8 timm:$src3)))>,
10093 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10094 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10095 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10096 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10097 (SrcInfo.VT (bitconvert
10098 (SrcInfo.LdFrag addr:$src2))),
10099 (i8 timm:$src3)))>,
10100 Sched<[sched.Folded, sched.ReadAfterFold]>;
10104 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10105 // op(reg_vec2,mem_vec,imm)
10106 // op(reg_vec2,broadcast(eltVt),imm)
10107 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10108 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10109 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10111 let ExeDomain = _.ExeDomain in
10112 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10113 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10114 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10115 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10116 (OpNode (_.VT _.RC:$src1),
10117 (_.VT (_.BroadcastLdFrag addr:$src2)),
10118 (i8 timm:$src3))>, EVEX_B,
10119 Sched<[sched.Folded, sched.ReadAfterFold]>;
10122 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10123 // op(reg_vec2,mem_scalar,imm)
10124 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10125 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10126 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10127 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10128 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10129 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10130 (OpNode (_.VT _.RC:$src1),
10132 (i32 timm:$src3))>,
10134 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10135 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10136 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10137 (OpNode (_.VT _.RC:$src1),
10138 (_.VT _.ScalarIntMemCPat:$src2),
10139 (i32 timm:$src3))>,
10140 Sched<[sched.Folded, sched.ReadAfterFold]>;
10144 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10145 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10146 SDNode OpNode, X86FoldableSchedWrite sched,
10147 X86VectorVTInfo _> {
10148 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10149 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10150 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10151 OpcodeStr, "$src3, {sae}, $src2, $src1",
10152 "$src1, $src2, {sae}, $src3",
10153 (OpNode (_.VT _.RC:$src1),
10155 (i32 timm:$src3))>,
10156 EVEX_B, Sched<[sched]>;
10159 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10160 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10161 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10162 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10163 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10164 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10165 OpcodeStr, "$src3, {sae}, $src2, $src1",
10166 "$src1, $src2, {sae}, $src3",
10167 (OpNode (_.VT _.RC:$src1),
10169 (i32 timm:$src3))>,
10170 EVEX_B, Sched<[sched]>;
10173 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10174 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10175 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10176 let Predicates = [prd] in {
10177 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10178 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10182 let Predicates = [prd, HasVLX] in {
10183 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10185 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10190 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10191 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10192 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10193 let Predicates = [Pred] in {
10194 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10195 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10197 let Predicates = [Pred, HasVLX] in {
10198 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10199 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10200 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10201 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10205 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10206 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10207 Predicate Pred = HasAVX512> {
10208 let Predicates = [Pred] in {
10209 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10212 let Predicates = [Pred, HasVLX] in {
10213 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10215 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10220 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10221 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10222 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10223 let Predicates = [prd] in {
10224 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10225 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10229 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10230 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10231 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10232 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10233 opcPs, OpNode, OpNodeSAE, sched, prd>,
10234 EVEX_CD8<32, CD8VF>;
10235 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10236 opcPd, OpNode, OpNodeSAE, sched, prd>,
10237 EVEX_CD8<64, CD8VF>, VEX_W;
10240 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10241 X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>,
10242 AVX512AIi8Base, EVEX;
10243 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10244 X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>,
10245 AVX512AIi8Base, EVEX;
10246 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10247 X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>,
10248 AVX512AIi8Base, EVEX;
10250 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10251 0x50, X86VRange, X86VRangeSAE,
10252 SchedWriteFAdd, HasDQI>,
10253 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10254 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10255 0x50, X86VRange, X86VRangeSAE,
10256 SchedWriteFAdd, HasDQI>,
10257 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10259 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10260 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10261 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10262 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10263 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10264 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10266 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10267 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10268 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10269 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10270 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10271 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10273 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10274 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10275 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10276 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10277 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10278 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10280 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10281 X86FoldableSchedWrite sched,
10283 X86VectorVTInfo CastInfo,
10284 string EVEX2VEXOvrd> {
10285 let ExeDomain = _.ExeDomain in {
10286 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10287 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10288 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10290 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10291 (i8 timm:$src3)))))>,
10292 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10293 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10294 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10295 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10298 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10299 (CastInfo.LdFrag addr:$src2),
10300 (i8 timm:$src3)))))>,
10301 Sched<[sched.Folded, sched.ReadAfterFold]>,
10302 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10303 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10304 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10305 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10306 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10310 (X86Shuf128 _.RC:$src1,
10311 (_.BroadcastLdFrag addr:$src2),
10312 (i8 timm:$src3)))))>, EVEX_B,
10313 Sched<[sched.Folded, sched.ReadAfterFold]>;
10317 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10318 AVX512VLVectorVTInfo _,
10319 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10320 string EVEX2VEXOvrd>{
10321 let Predicates = [HasAVX512] in
10322 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10323 _.info512, CastInfo.info512, "">, EVEX_V512;
10325 let Predicates = [HasAVX512, HasVLX] in
10326 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10327 _.info256, CastInfo.info256,
10328 EVEX2VEXOvrd>, EVEX_V256;
10331 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10332 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10333 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10334 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10335 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10336 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10337 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10338 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10340 let Predicates = [HasAVX512] in {
10341 // Provide fallback in case the load node that is used in the broadcast
10342 // patterns above is used by additional users, which prevents the pattern
10344 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10345 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10346 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10348 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10349 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10350 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10353 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10354 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10355 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10357 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10358 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10359 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10362 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10363 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10364 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10367 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10368 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10369 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10373 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10374 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10375 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10376 // instantiation of this class.
10377 let ExeDomain = _.ExeDomain in {
10378 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10379 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10380 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10381 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10382 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10383 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10384 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10385 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10386 (_.VT (X86VAlign _.RC:$src1,
10387 (bitconvert (_.LdFrag addr:$src2)),
10388 (i8 timm:$src3)))>,
10389 Sched<[sched.Folded, sched.ReadAfterFold]>,
10390 EVEX2VEXOverride<"VPALIGNRrmi">;
10392 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10393 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10394 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10395 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10396 (X86VAlign _.RC:$src1,
10397 (_.VT (_.BroadcastLdFrag addr:$src2)),
10398 (i8 timm:$src3))>, EVEX_B,
10399 Sched<[sched.Folded, sched.ReadAfterFold]>;
10403 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10404 AVX512VLVectorVTInfo _> {
10405 let Predicates = [HasAVX512] in {
10406 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10407 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10409 let Predicates = [HasAVX512, HasVLX] in {
10410 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10411 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10412 // We can't really override the 256-bit version so change it back to unset.
10413 let EVEX2VEXOverride = ? in
10414 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10415 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10419 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10420 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10421 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10422 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10425 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10426 SchedWriteShuffle, avx512vl_i8_info,
10427 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10429 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10431 def ValignqImm32XForm : SDNodeXForm<timm, [{
10432 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10434 def ValignqImm8XForm : SDNodeXForm<timm, [{
10435 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10437 def ValigndImm8XForm : SDNodeXForm<timm, [{
10438 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10441 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10442 X86VectorVTInfo From, X86VectorVTInfo To,
10443 SDNodeXForm ImmXForm> {
10444 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10446 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10449 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10450 To.RC:$src1, To.RC:$src2,
10451 (ImmXForm timm:$src3))>;
10453 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10455 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10458 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10459 To.RC:$src1, To.RC:$src2,
10460 (ImmXForm timm:$src3))>;
10462 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10464 (From.VT (OpNode From.RC:$src1,
10465 (From.LdFrag addr:$src2),
10468 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10469 To.RC:$src1, addr:$src2,
10470 (ImmXForm timm:$src3))>;
10472 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10474 (From.VT (OpNode From.RC:$src1,
10475 (From.LdFrag addr:$src2),
10478 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10479 To.RC:$src1, addr:$src2,
10480 (ImmXForm timm:$src3))>;
10483 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10484 X86VectorVTInfo From,
10485 X86VectorVTInfo To,
10486 SDNodeXForm ImmXForm> :
10487 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10488 def : Pat<(From.VT (OpNode From.RC:$src1,
10489 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10491 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10492 (ImmXForm timm:$src3))>;
10494 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10496 (From.VT (OpNode From.RC:$src1,
10498 (To.VT (To.BroadcastLdFrag addr:$src2))),
10501 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10502 To.RC:$src1, addr:$src2,
10503 (ImmXForm timm:$src3))>;
10505 def : Pat<(To.VT (vselect To.KRCWM:$mask,
10507 (From.VT (OpNode From.RC:$src1,
10509 (To.VT (To.BroadcastLdFrag addr:$src2))),
10512 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10513 To.RC:$src1, addr:$src2,
10514 (ImmXForm timm:$src3))>;
10517 let Predicates = [HasAVX512] in {
10518 // For 512-bit we lower to the widest element type we can. So we only need
10519 // to handle converting valignq to valignd.
10520 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10521 v16i32_info, ValignqImm32XForm>;
10524 let Predicates = [HasVLX] in {
10525 // For 128-bit we lower to the widest element type we can. So we only need
10526 // to handle converting valignq to valignd.
10527 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10528 v4i32x_info, ValignqImm32XForm>;
10529 // For 256-bit we lower to the widest element type we can. So we only need
10530 // to handle converting valignq to valignd.
10531 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10532 v8i32x_info, ValignqImm32XForm>;
10535 let Predicates = [HasVLX, HasBWI] in {
10536 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10537 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10538 v16i8x_info, ValignqImm8XForm>;
10539 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10540 v16i8x_info, ValigndImm8XForm>;
10543 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10544 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10545 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10547 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10548 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10549 let ExeDomain = _.ExeDomain in {
10550 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10551 (ins _.RC:$src1), OpcodeStr,
10553 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10556 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10557 (ins _.MemOp:$src1), OpcodeStr,
10559 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10560 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10561 Sched<[sched.Folded]>;
10565 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10566 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10567 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10568 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10569 (ins _.ScalarMemOp:$src1), OpcodeStr,
10570 "${src1}"##_.BroadcastStr,
10571 "${src1}"##_.BroadcastStr,
10572 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10573 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10574 Sched<[sched.Folded]>;
10577 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10578 X86SchedWriteWidths sched,
10579 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10580 let Predicates = [prd] in
10581 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10584 let Predicates = [prd, HasVLX] in {
10585 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10587 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10592 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10593 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10595 let Predicates = [prd] in
10596 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10599 let Predicates = [prd, HasVLX] in {
10600 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10602 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10607 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10608 SDNode OpNode, X86SchedWriteWidths sched,
10610 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10611 avx512vl_i64_info, prd>, VEX_W;
10612 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10613 avx512vl_i32_info, prd>;
10616 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10617 SDNode OpNode, X86SchedWriteWidths sched,
10619 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10620 avx512vl_i16_info, prd>, VEX_WIG;
10621 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10622 avx512vl_i8_info, prd>, VEX_WIG;
10625 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10626 bits<8> opc_d, bits<8> opc_q,
10627 string OpcodeStr, SDNode OpNode,
10628 X86SchedWriteWidths sched> {
10629 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10631 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10635 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10638 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10639 let Predicates = [HasAVX512, NoVLX] in {
10640 def : Pat<(v4i64 (abs VR256X:$src)),
10643 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10645 def : Pat<(v2i64 (abs VR128X:$src)),
10648 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10652 // Use 512bit version to implement 128/256 bit.
10653 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10654 AVX512VLVectorVTInfo _, Predicate prd> {
10655 let Predicates = [prd, NoVLX] in {
10656 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10658 (!cast<Instruction>(InstrStr # "Zrr")
10659 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10660 _.info256.RC:$src1,
10661 _.info256.SubRegIdx)),
10662 _.info256.SubRegIdx)>;
10664 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10666 (!cast<Instruction>(InstrStr # "Zrr")
10667 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10668 _.info128.RC:$src1,
10669 _.info128.SubRegIdx)),
10670 _.info128.SubRegIdx)>;
10674 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10675 SchedWriteVecIMul, HasCDI>;
10677 // FIXME: Is there a better scheduler class for VPCONFLICT?
10678 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10679 SchedWriteVecALU, HasCDI>;
10681 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10682 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10683 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10685 //===---------------------------------------------------------------------===//
10686 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10687 //===---------------------------------------------------------------------===//
10689 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10690 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10691 SchedWriteVecALU, HasVPOPCNTDQ>;
10693 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10694 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10696 //===---------------------------------------------------------------------===//
10697 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10698 //===---------------------------------------------------------------------===//
10700 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10701 X86SchedWriteWidths sched> {
10702 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10703 avx512vl_f32_info, HasAVX512>, XS;
10706 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10707 SchedWriteFShuffle>;
10708 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10709 SchedWriteFShuffle>;
10711 //===----------------------------------------------------------------------===//
10712 // AVX-512 - MOVDDUP
10713 //===----------------------------------------------------------------------===//
10715 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10716 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10717 let ExeDomain = _.ExeDomain in {
10718 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10719 (ins _.RC:$src), OpcodeStr, "$src", "$src",
10720 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10722 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10723 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10724 (_.VT (_.BroadcastLdFrag addr:$src))>,
10725 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10726 Sched<[sched.Folded]>;
10730 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10731 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10732 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10733 VTInfo.info512>, EVEX_V512;
10735 let Predicates = [HasAVX512, HasVLX] in {
10736 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10737 VTInfo.info256>, EVEX_V256;
10738 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10739 VTInfo.info128>, EVEX_V128;
10743 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10744 X86SchedWriteWidths sched> {
10745 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10746 avx512vl_f64_info>, XD, VEX_W;
10749 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10751 let Predicates = [HasVLX] in {
10752 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10753 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10754 def : Pat<(v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10755 (VMOVDDUPZ128rm addr:$src)>;
10756 def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload64 addr:$src)))),
10757 (VMOVDDUPZ128rm addr:$src)>;
10759 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10760 (v2f64 VR128X:$src0)),
10761 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10762 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10763 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10765 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10767 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10768 (v2f64 VR128X:$src0)),
10769 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10770 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcastld64 addr:$src)),
10772 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10774 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10775 (v2f64 VR128X:$src0)),
10776 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
10777 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (v2f64 (simple_load addr:$src)))),
10779 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
10782 //===----------------------------------------------------------------------===//
10783 // AVX-512 - Unpack Instructions
10784 //===----------------------------------------------------------------------===//
10786 let Uses = []<Register>, mayRaiseFPException = 0 in {
10787 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
10788 SchedWriteFShuffleSizes, 0, 1>;
10789 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
10790 SchedWriteFShuffleSizes>;
10793 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10794 SchedWriteShuffle, HasBWI>;
10795 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10796 SchedWriteShuffle, HasBWI>;
10797 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10798 SchedWriteShuffle, HasBWI>;
10799 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10800 SchedWriteShuffle, HasBWI>;
10802 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10803 SchedWriteShuffle, HasAVX512>;
10804 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10805 SchedWriteShuffle, HasAVX512>;
10806 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10807 SchedWriteShuffle, HasAVX512>;
10808 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10809 SchedWriteShuffle, HasAVX512>;
10811 //===----------------------------------------------------------------------===//
10812 // AVX-512 - Extract & Insert Integer Instructions
10813 //===----------------------------------------------------------------------===//
10815 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10816 X86VectorVTInfo _> {
10817 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10818 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10819 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10820 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10822 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10825 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10826 let Predicates = [HasBWI] in {
10827 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10828 (ins _.RC:$src1, u8imm:$src2),
10829 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10830 [(set GR32orGR64:$dst,
10831 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10832 EVEX, TAPD, Sched<[WriteVecExtract]>;
10834 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10838 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10839 let Predicates = [HasBWI] in {
10840 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10841 (ins _.RC:$src1, u8imm:$src2),
10842 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10843 [(set GR32orGR64:$dst,
10844 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10845 EVEX, PD, Sched<[WriteVecExtract]>;
10847 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10848 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10849 (ins _.RC:$src1, u8imm:$src2),
10850 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10851 EVEX, TAPD, FoldGenData<NAME#rr>,
10852 Sched<[WriteVecExtract]>;
10854 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10858 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10859 RegisterClass GRC> {
10860 let Predicates = [HasDQI] in {
10861 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10862 (ins _.RC:$src1, u8imm:$src2),
10863 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10865 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10866 EVEX, TAPD, Sched<[WriteVecExtract]>;
10868 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10869 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10870 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10871 [(store (extractelt (_.VT _.RC:$src1),
10872 imm:$src2),addr:$dst)]>,
10873 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10874 Sched<[WriteVecExtractSt]>;
10878 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10879 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10880 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10881 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10883 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10884 X86VectorVTInfo _, PatFrag LdFrag> {
10885 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10886 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10887 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10889 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10890 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10893 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10894 X86VectorVTInfo _, PatFrag LdFrag> {
10895 let Predicates = [HasBWI] in {
10896 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10897 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10898 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10900 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10901 Sched<[WriteVecInsert]>;
10903 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10907 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10908 X86VectorVTInfo _, RegisterClass GRC> {
10909 let Predicates = [HasDQI] in {
10910 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10911 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10912 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10914 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10915 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10917 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10918 _.ScalarLdFrag>, TAPD;
10922 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
10923 extloadi8>, TAPD, VEX_WIG;
10924 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
10925 extloadi16>, PD, VEX_WIG;
10926 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
10927 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
10929 //===----------------------------------------------------------------------===//
10930 // VSHUFPS - VSHUFPD Operations
10931 //===----------------------------------------------------------------------===//
10933 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
10934 AVX512VLVectorVTInfo VTInfo_FP>{
10935 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
10936 SchedWriteFShuffle>,
10937 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
10938 AVX512AIi8Base, EVEX_4V;
10941 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
10942 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
10944 //===----------------------------------------------------------------------===//
10945 // AVX-512 - Byte shift Left/Right
10946 //===----------------------------------------------------------------------===//
10948 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
10949 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
10950 Format MRMm, string OpcodeStr,
10951 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10952 def rr : AVX512<opc, MRMr,
10953 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
10954 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10955 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
10957 def rm : AVX512<opc, MRMm,
10958 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
10959 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10960 [(set _.RC:$dst,(_.VT (OpNode
10961 (_.VT (bitconvert (_.LdFrag addr:$src1))),
10962 (i8 timm:$src2))))]>,
10963 Sched<[sched.Folded, sched.ReadAfterFold]>;
10966 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
10967 Format MRMm, string OpcodeStr,
10968 X86SchedWriteWidths sched, Predicate prd>{
10969 let Predicates = [prd] in
10970 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10971 sched.ZMM, v64i8_info>, EVEX_V512;
10972 let Predicates = [prd, HasVLX] in {
10973 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10974 sched.YMM, v32i8x_info>, EVEX_V256;
10975 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
10976 sched.XMM, v16i8x_info>, EVEX_V128;
10979 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
10980 SchedWriteShuffle, HasBWI>,
10981 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
10982 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
10983 SchedWriteShuffle, HasBWI>,
10984 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
10986 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
10987 string OpcodeStr, X86FoldableSchedWrite sched,
10988 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
10989 let isCommutable = 1 in
10990 def rr : AVX512BI<opc, MRMSrcReg,
10991 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
10992 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
10993 [(set _dst.RC:$dst,(_dst.VT
10994 (OpNode (_src.VT _src.RC:$src1),
10995 (_src.VT _src.RC:$src2))))]>,
10997 def rm : AVX512BI<opc, MRMSrcMem,
10998 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
10999 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11000 [(set _dst.RC:$dst,(_dst.VT
11001 (OpNode (_src.VT _src.RC:$src1),
11002 (_src.VT (bitconvert
11003 (_src.LdFrag addr:$src2))))))]>,
11004 Sched<[sched.Folded, sched.ReadAfterFold]>;
11007 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11008 string OpcodeStr, X86SchedWriteWidths sched,
11010 let Predicates = [prd] in
11011 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11012 v8i64_info, v64i8_info>, EVEX_V512;
11013 let Predicates = [prd, HasVLX] in {
11014 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11015 v4i64x_info, v32i8x_info>, EVEX_V256;
11016 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11017 v2i64x_info, v16i8x_info>, EVEX_V128;
11021 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11022 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11024 // Transforms to swizzle an immediate to enable better matching when
11025 // memory operand isn't in the right place.
11026 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11027 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11028 uint8_t Imm = N->getZExtValue();
11029 // Swap bits 1/4 and 3/6.
11030 uint8_t NewImm = Imm & 0xa5;
11031 if (Imm & 0x02) NewImm |= 0x10;
11032 if (Imm & 0x10) NewImm |= 0x02;
11033 if (Imm & 0x08) NewImm |= 0x40;
11034 if (Imm & 0x40) NewImm |= 0x08;
11035 return getI8Imm(NewImm, SDLoc(N));
11037 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11038 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11039 uint8_t Imm = N->getZExtValue();
11040 // Swap bits 2/4 and 3/5.
11041 uint8_t NewImm = Imm & 0xc3;
11042 if (Imm & 0x04) NewImm |= 0x10;
11043 if (Imm & 0x10) NewImm |= 0x04;
11044 if (Imm & 0x08) NewImm |= 0x20;
11045 if (Imm & 0x20) NewImm |= 0x08;
11046 return getI8Imm(NewImm, SDLoc(N));
11048 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11049 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11050 uint8_t Imm = N->getZExtValue();
11051 // Swap bits 1/2 and 5/6.
11052 uint8_t NewImm = Imm & 0x99;
11053 if (Imm & 0x02) NewImm |= 0x04;
11054 if (Imm & 0x04) NewImm |= 0x02;
11055 if (Imm & 0x20) NewImm |= 0x40;
11056 if (Imm & 0x40) NewImm |= 0x20;
11057 return getI8Imm(NewImm, SDLoc(N));
11059 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11060 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11061 uint8_t Imm = N->getZExtValue();
11062 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11063 uint8_t NewImm = Imm & 0x81;
11064 if (Imm & 0x02) NewImm |= 0x04;
11065 if (Imm & 0x04) NewImm |= 0x10;
11066 if (Imm & 0x08) NewImm |= 0x40;
11067 if (Imm & 0x10) NewImm |= 0x02;
11068 if (Imm & 0x20) NewImm |= 0x08;
11069 if (Imm & 0x40) NewImm |= 0x20;
11070 return getI8Imm(NewImm, SDLoc(N));
11072 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11073 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11074 uint8_t Imm = N->getZExtValue();
11075 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11076 uint8_t NewImm = Imm & 0x81;
11077 if (Imm & 0x02) NewImm |= 0x10;
11078 if (Imm & 0x04) NewImm |= 0x02;
11079 if (Imm & 0x08) NewImm |= 0x20;
11080 if (Imm & 0x10) NewImm |= 0x04;
11081 if (Imm & 0x20) NewImm |= 0x40;
11082 if (Imm & 0x40) NewImm |= 0x08;
11083 return getI8Imm(NewImm, SDLoc(N));
11086 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11087 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11089 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11090 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11091 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11092 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11093 (OpNode (_.VT _.RC:$src1),
11096 (i8 timm:$src4)), 1, 1>,
11097 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11098 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11099 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11100 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11101 (OpNode (_.VT _.RC:$src1),
11103 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11104 (i8 timm:$src4)), 1, 0>,
11105 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11106 Sched<[sched.Folded, sched.ReadAfterFold]>;
11107 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11108 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11109 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11110 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11111 (OpNode (_.VT _.RC:$src1),
11113 (_.VT (_.BroadcastLdFrag addr:$src3)),
11114 (i8 timm:$src4)), 1, 0>, EVEX_B,
11115 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11116 Sched<[sched.Folded, sched.ReadAfterFold]>;
11117 }// Constraints = "$src1 = $dst"
11119 // Additional patterns for matching passthru operand in other positions.
11120 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11121 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11123 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11124 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11125 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11126 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11128 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11129 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11131 // Additional patterns for matching loads in other positions.
11132 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11133 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11134 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11135 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11136 def : Pat<(_.VT (OpNode _.RC:$src1,
11137 (bitconvert (_.LdFrag addr:$src3)),
11138 _.RC:$src2, (i8 timm:$src4))),
11139 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11140 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11142 // Additional patterns for matching zero masking with loads in other
11144 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11145 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11146 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11148 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11149 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11150 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11151 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11152 _.RC:$src2, (i8 timm:$src4)),
11154 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11155 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11157 // Additional patterns for matching masked loads with different
11159 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11160 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11161 _.RC:$src2, (i8 timm:$src4)),
11163 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11164 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11165 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11166 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11167 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11169 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11170 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11171 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11172 (OpNode _.RC:$src2, _.RC:$src1,
11173 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11175 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11176 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11177 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11178 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11179 _.RC:$src1, (i8 timm:$src4)),
11181 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11182 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11183 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11184 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11185 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11187 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11188 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11190 // Additional patterns for matching broadcasts in other positions.
11191 def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
11192 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11193 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11194 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11195 def : Pat<(_.VT (OpNode _.RC:$src1,
11196 (_.BroadcastLdFrag addr:$src3),
11197 _.RC:$src2, (i8 timm:$src4))),
11198 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11199 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11201 // Additional patterns for matching zero masking with broadcasts in other
11203 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11204 (OpNode (_.BroadcastLdFrag addr:$src3),
11205 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11207 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11208 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11209 (VPTERNLOG321_imm8 timm:$src4))>;
11210 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11211 (OpNode _.RC:$src1,
11212 (_.BroadcastLdFrag addr:$src3),
11213 _.RC:$src2, (i8 timm:$src4)),
11215 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11216 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11217 (VPTERNLOG132_imm8 timm:$src4))>;
11219 // Additional patterns for matching masked broadcasts with different
11221 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11222 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11223 _.RC:$src2, (i8 timm:$src4)),
11225 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11226 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11227 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11228 (OpNode (_.BroadcastLdFrag addr:$src3),
11229 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11231 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11232 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11233 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11234 (OpNode _.RC:$src2, _.RC:$src1,
11235 (_.BroadcastLdFrag addr:$src3),
11236 (i8 timm:$src4)), _.RC:$src1)),
11237 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11238 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11239 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11240 (OpNode _.RC:$src2,
11241 (_.BroadcastLdFrag addr:$src3),
11242 _.RC:$src1, (i8 timm:$src4)),
11244 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11245 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11246 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11247 (OpNode (_.BroadcastLdFrag addr:$src3),
11248 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11250 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11251 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11254 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11255 AVX512VLVectorVTInfo _> {
11256 let Predicates = [HasAVX512] in
11257 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11258 _.info512, NAME>, EVEX_V512;
11259 let Predicates = [HasAVX512, HasVLX] in {
11260 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11261 _.info128, NAME>, EVEX_V128;
11262 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11263 _.info256, NAME>, EVEX_V256;
11267 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11268 avx512vl_i32_info>;
11269 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11270 avx512vl_i64_info>, VEX_W;
11272 // Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11273 let Predicates = [HasVLX] in {
11274 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11276 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11278 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11279 (loadv16i8 addr:$src3), (i8 timm:$src4))),
11280 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11282 def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11283 VR128X:$src1, (i8 timm:$src4))),
11284 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11285 (VPTERNLOG321_imm8 timm:$src4))>;
11286 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11287 VR128X:$src2, (i8 timm:$src4))),
11288 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11289 (VPTERNLOG132_imm8 timm:$src4))>;
11291 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11293 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11295 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11296 (loadv8i16 addr:$src3), (i8 timm:$src4))),
11297 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11299 def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11300 VR128X:$src1, (i8 timm:$src4))),
11301 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11302 (VPTERNLOG321_imm8 timm:$src4))>;
11303 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11304 VR128X:$src2, (i8 timm:$src4))),
11305 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11306 (VPTERNLOG132_imm8 timm:$src4))>;
11308 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11310 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11312 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11313 (loadv32i8 addr:$src3), (i8 timm:$src4))),
11314 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11316 def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11317 VR256X:$src1, (i8 timm:$src4))),
11318 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11319 (VPTERNLOG321_imm8 timm:$src4))>;
11320 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11321 VR256X:$src2, (i8 timm:$src4))),
11322 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11323 (VPTERNLOG132_imm8 timm:$src4))>;
11325 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11327 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11329 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11330 (loadv16i16 addr:$src3), (i8 timm:$src4))),
11331 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11333 def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11334 VR256X:$src1, (i8 timm:$src4))),
11335 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11336 (VPTERNLOG321_imm8 timm:$src4))>;
11337 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11338 VR256X:$src2, (i8 timm:$src4))),
11339 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11340 (VPTERNLOG132_imm8 timm:$src4))>;
11343 let Predicates = [HasAVX512] in {
11344 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11346 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11348 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11349 (loadv64i8 addr:$src3), (i8 timm:$src4))),
11350 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11352 def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11353 VR512:$src1, (i8 timm:$src4))),
11354 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11355 (VPTERNLOG321_imm8 timm:$src4))>;
11356 def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11357 VR512:$src2, (i8 timm:$src4))),
11358 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11359 (VPTERNLOG132_imm8 timm:$src4))>;
11361 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11363 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11365 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11366 (loadv32i16 addr:$src3), (i8 timm:$src4))),
11367 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11369 def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11370 VR512:$src1, (i8 timm:$src4))),
11371 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11372 (VPTERNLOG321_imm8 timm:$src4))>;
11373 def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11374 VR512:$src2, (i8 timm:$src4))),
11375 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11376 (VPTERNLOG132_imm8 timm:$src4))>;
11379 // Patterns to implement vnot using vpternlog instead of creating all ones
11380 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11381 // so that the result is only dependent on src0. But we use the same source
11382 // for all operands to prevent a false dependency.
11383 // TODO: We should maybe have a more generalized algorithm for folding to
11385 let Predicates = [HasAVX512] in {
11386 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11387 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11388 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11389 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11390 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11391 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11392 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11393 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11396 let Predicates = [HasAVX512, NoVLX] in {
11397 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11400 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11401 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11402 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11403 (i8 15)), sub_xmm)>;
11404 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11407 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11408 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11409 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11410 (i8 15)), sub_xmm)>;
11411 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11414 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11415 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11416 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11417 (i8 15)), sub_xmm)>;
11418 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11421 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11422 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11423 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11424 (i8 15)), sub_xmm)>;
11426 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11429 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11430 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11431 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11432 (i8 15)), sub_ymm)>;
11433 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11436 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11437 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11438 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11439 (i8 15)), sub_ymm)>;
11440 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11443 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11444 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11445 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11446 (i8 15)), sub_ymm)>;
11447 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11450 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11451 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11452 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11453 (i8 15)), sub_ymm)>;
11456 let Predicates = [HasVLX] in {
11457 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11458 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11459 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11460 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11461 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11462 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11463 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11464 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11466 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11467 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11468 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11469 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11470 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11471 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11472 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11473 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11476 //===----------------------------------------------------------------------===//
11477 // AVX-512 - FixupImm
11478 //===----------------------------------------------------------------------===//
11480 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11481 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11482 X86VectorVTInfo TblVT>{
11483 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11484 Uses = [MXCSR], mayRaiseFPException = 1 in {
11485 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11486 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11487 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11488 (X86VFixupimm (_.VT _.RC:$src1),
11490 (TblVT.VT _.RC:$src3),
11491 (i32 timm:$src4))>, Sched<[sched]>;
11492 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11493 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11494 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11495 (X86VFixupimm (_.VT _.RC:$src1),
11497 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11498 (i32 timm:$src4))>,
11499 Sched<[sched.Folded, sched.ReadAfterFold]>;
11500 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11501 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11502 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11503 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11504 (X86VFixupimm (_.VT _.RC:$src1),
11506 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11507 (i32 timm:$src4))>,
11508 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11509 } // Constraints = "$src1 = $dst"
11512 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11513 X86FoldableSchedWrite sched,
11514 X86VectorVTInfo _, X86VectorVTInfo TblVT>
11515 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11516 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11517 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11518 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11519 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11520 "$src2, $src3, {sae}, $src4",
11521 (X86VFixupimmSAE (_.VT _.RC:$src1),
11523 (TblVT.VT _.RC:$src3),
11524 (i32 timm:$src4))>,
11525 EVEX_B, Sched<[sched]>;
11529 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11530 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11531 X86VectorVTInfo _src3VT> {
11532 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11533 ExeDomain = _.ExeDomain in {
11534 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11535 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11536 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11537 (X86VFixupimms (_.VT _.RC:$src1),
11539 (_src3VT.VT _src3VT.RC:$src3),
11540 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11541 let Uses = [MXCSR] in
11542 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11543 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11544 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
11545 "$src2, $src3, {sae}, $src4",
11546 (X86VFixupimmSAEs (_.VT _.RC:$src1),
11548 (_src3VT.VT _src3VT.RC:$src3),
11549 (i32 timm:$src4))>,
11550 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11551 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11552 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11553 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11554 (X86VFixupimms (_.VT _.RC:$src1),
11556 (_src3VT.VT (scalar_to_vector
11557 (_src3VT.ScalarLdFrag addr:$src3))),
11558 (i32 timm:$src4))>,
11559 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11563 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11564 AVX512VLVectorVTInfo _Vec,
11565 AVX512VLVectorVTInfo _Tbl> {
11566 let Predicates = [HasAVX512] in
11567 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11568 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11569 EVEX_4V, EVEX_V512;
11570 let Predicates = [HasAVX512, HasVLX] in {
11571 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11572 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11573 EVEX_4V, EVEX_V128;
11574 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11575 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11576 EVEX_4V, EVEX_V256;
11580 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11581 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11582 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11583 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11584 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11585 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11586 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11587 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11588 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11589 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11591 // Patterns used to select SSE scalar fp arithmetic instructions from
11594 // (1) a scalar fp operation followed by a blend
11596 // The effect is that the backend no longer emits unnecessary vector
11597 // insert instructions immediately after SSE scalar fp instructions
11598 // like addss or mulss.
11600 // For example, given the following code:
11601 // __m128 foo(__m128 A, __m128 B) {
11606 // Previously we generated:
11607 // addss %xmm0, %xmm1
11608 // movss %xmm1, %xmm0
11610 // We now generate:
11611 // addss %xmm1, %xmm0
11613 // (2) a vector packed single/double fp operation followed by a vector insert
11615 // The effect is that the backend converts the packed fp instruction
11616 // followed by a vector insert into a single SSE scalar fp instruction.
11618 // For example, given the following code:
11619 // __m128 foo(__m128 A, __m128 B) {
11620 // __m128 C = A + B;
11621 // return (__m128) {c[0], a[1], a[2], a[3]};
11624 // Previously we generated:
11625 // addps %xmm0, %xmm1
11626 // movss %xmm1, %xmm0
11628 // We now generate:
11629 // addss %xmm1, %xmm0
11631 // TODO: Some canonicalization in lowering would simplify the number of
11632 // patterns we have to try to match.
11633 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
11634 X86VectorVTInfo _, PatLeaf ZeroFP> {
11635 let Predicates = [HasAVX512] in {
11636 // extracted scalar math op with insert via movss
11637 def : Pat<(MoveNode
11638 (_.VT VR128X:$dst),
11639 (_.VT (scalar_to_vector
11640 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11642 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
11643 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
11644 def : Pat<(MoveNode
11645 (_.VT VR128X:$dst),
11646 (_.VT (scalar_to_vector
11647 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
11648 (_.ScalarLdFrag addr:$src))))),
11649 (!cast<Instruction>("V"#OpcPrefix#Zrm_Int) _.VT:$dst, addr:$src)>;
11651 // extracted masked scalar math op with insert via movss
11652 def : Pat<(MoveNode (_.VT VR128X:$src1),
11654 (X86selects VK1WM:$mask,
11656 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11659 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
11660 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11661 VK1WM:$mask, _.VT:$src1,
11662 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11663 def : Pat<(MoveNode (_.VT VR128X:$src1),
11665 (X86selects VK1WM:$mask,
11667 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11668 (_.ScalarLdFrag addr:$src2)),
11670 (!cast<Instruction>("V"#OpcPrefix#Zrm_Intk)
11671 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
11672 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11674 // extracted masked scalar math op with insert via movss
11675 def : Pat<(MoveNode (_.VT VR128X:$src1),
11677 (X86selects VK1WM:$mask,
11679 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11680 _.FRC:$src2), (_.EltVT ZeroFP)))),
11681 (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
11682 VK1WM:$mask, _.VT:$src1,
11683 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
11684 def : Pat<(MoveNode (_.VT VR128X:$src1),
11686 (X86selects VK1WM:$mask,
11688 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
11689 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
11690 (!cast<I>("V"#OpcPrefix#Zrm_Intkz) VK1WM:$mask, _.VT:$src1, addr:$src2)>;
11694 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
11695 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
11696 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
11697 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
11699 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
11700 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
11701 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
11702 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
11704 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
11705 SDNode Move, X86VectorVTInfo _> {
11706 let Predicates = [HasAVX512] in {
11707 def : Pat<(_.VT (Move _.VT:$dst,
11708 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
11709 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
11713 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
11714 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
11716 //===----------------------------------------------------------------------===//
11717 // AES instructions
11718 //===----------------------------------------------------------------------===//
11720 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
11721 let Predicates = [HasVLX, HasVAES] in {
11722 defm Z128 : AESI_binop_rm_int<Op, OpStr,
11723 !cast<Intrinsic>(IntPrefix),
11724 loadv2i64, 0, VR128X, i128mem>,
11725 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
11726 defm Z256 : AESI_binop_rm_int<Op, OpStr,
11727 !cast<Intrinsic>(IntPrefix##"_256"),
11728 loadv4i64, 0, VR256X, i256mem>,
11729 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
11731 let Predicates = [HasAVX512, HasVAES] in
11732 defm Z : AESI_binop_rm_int<Op, OpStr,
11733 !cast<Intrinsic>(IntPrefix##"_512"),
11734 loadv8i64, 0, VR512, i512mem>,
11735 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
11738 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
11739 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
11740 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
11741 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
11743 //===----------------------------------------------------------------------===//
11744 // PCLMUL instructions - Carry less multiplication
11745 //===----------------------------------------------------------------------===//
11747 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
11748 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
11749 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
11751 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
11752 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
11753 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
11755 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
11756 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
11757 EVEX_CD8<64, CD8VF>, VEX_WIG;
11761 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
11762 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
11763 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
11765 //===----------------------------------------------------------------------===//
11767 //===----------------------------------------------------------------------===//
11769 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
11770 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11771 let Constraints = "$src1 = $dst",
11772 ExeDomain = VTI.ExeDomain in {
11773 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11774 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11775 "$src3, $src2", "$src2, $src3",
11776 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
11777 AVX512FMA3Base, Sched<[sched]>;
11778 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11779 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11780 "$src3, $src2", "$src2, $src3",
11781 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11782 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11784 Sched<[sched.Folded, sched.ReadAfterFold]>;
11788 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11789 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
11790 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
11791 let Constraints = "$src1 = $dst",
11792 ExeDomain = VTI.ExeDomain in
11793 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11794 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
11795 "${src3}"##VTI.BroadcastStr##", $src2",
11796 "$src2, ${src3}"##VTI.BroadcastStr,
11797 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11798 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11799 AVX512FMA3Base, EVEX_B,
11800 Sched<[sched.Folded, sched.ReadAfterFold]>;
11803 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
11804 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11805 let Predicates = [HasVBMI2] in
11806 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11808 let Predicates = [HasVBMI2, HasVLX] in {
11809 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11811 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11816 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
11817 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11818 let Predicates = [HasVBMI2] in
11819 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
11821 let Predicates = [HasVBMI2, HasVLX] in {
11822 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
11824 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
11828 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
11829 SDNode OpNode, X86SchedWriteWidths sched> {
11830 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
11831 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
11832 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
11833 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11834 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
11835 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
11838 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
11839 SDNode OpNode, X86SchedWriteWidths sched> {
11840 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
11841 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
11842 VEX_W, EVEX_CD8<16, CD8VF>;
11843 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
11844 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11845 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
11846 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11850 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
11851 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
11852 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
11853 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
11856 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
11857 avx512vl_i8_info, HasVBMI2>, EVEX,
11859 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
11860 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
11863 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
11864 avx512vl_i8_info, HasVBMI2>, EVEX;
11865 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
11866 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
11868 //===----------------------------------------------------------------------===//
11870 //===----------------------------------------------------------------------===//
11872 let Constraints = "$src1 = $dst" in
11873 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
11874 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
11875 bit IsCommutable> {
11876 let ExeDomain = VTI.ExeDomain in {
11877 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
11878 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
11879 "$src3, $src2", "$src2, $src3",
11880 (VTI.VT (OpNode VTI.RC:$src1,
11881 VTI.RC:$src2, VTI.RC:$src3)),
11882 IsCommutable, IsCommutable>,
11883 EVEX_4V, T8PD, Sched<[sched]>;
11884 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11885 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
11886 "$src3, $src2", "$src2, $src3",
11887 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
11888 (VTI.VT (VTI.LdFrag addr:$src3))))>,
11889 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
11890 Sched<[sched.Folded, sched.ReadAfterFold]>;
11891 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
11892 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
11893 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
11894 "$src2, ${src3}"##VTI.BroadcastStr,
11895 (OpNode VTI.RC:$src1, VTI.RC:$src2,
11896 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
11897 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
11898 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
11902 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
11903 X86SchedWriteWidths sched, bit IsCommutable> {
11904 let Predicates = [HasVNNI] in
11905 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
11906 IsCommutable>, EVEX_V512;
11907 let Predicates = [HasVNNI, HasVLX] in {
11908 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
11909 IsCommutable>, EVEX_V256;
11910 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
11911 IsCommutable>, EVEX_V128;
11915 // FIXME: Is there a better scheduler class for VPDP?
11916 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
11917 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
11918 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
11919 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
11921 def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
11922 (X86vpmaddwd node:$lhs, node:$rhs), [{
11923 return N->hasOneUse();
11926 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
11927 let Predicates = [HasVNNI] in {
11928 def : Pat<(v16i32 (add VR512:$src1,
11929 (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
11930 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
11931 def : Pat<(v16i32 (add VR512:$src1,
11932 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
11933 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
11935 let Predicates = [HasVNNI,HasVLX] in {
11936 def : Pat<(v8i32 (add VR256X:$src1,
11937 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
11938 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
11939 def : Pat<(v8i32 (add VR256X:$src1,
11940 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
11941 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
11942 def : Pat<(v4i32 (add VR128X:$src1,
11943 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
11944 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
11945 def : Pat<(v4i32 (add VR128X:$src1,
11946 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
11947 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
11950 //===----------------------------------------------------------------------===//
11952 //===----------------------------------------------------------------------===//
11954 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
11955 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
11956 avx512vl_i8_info, HasBITALG>;
11957 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
11958 avx512vl_i16_info, HasBITALG>, VEX_W;
11960 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
11961 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
11963 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
11964 (X86Vpshufbitqmb node:$src1, node:$src2), [{
11965 return N->hasOneUse();
11968 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
11969 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
11970 (ins VTI.RC:$src1, VTI.RC:$src2),
11972 "$src2, $src1", "$src1, $src2",
11973 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11974 (VTI.VT VTI.RC:$src2)),
11975 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11976 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
11978 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
11979 (ins VTI.RC:$src1, VTI.MemOp:$src2),
11981 "$src2, $src1", "$src1, $src2",
11982 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
11983 (VTI.VT (VTI.LdFrag addr:$src2))),
11984 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
11985 (VTI.VT (VTI.LdFrag addr:$src2)))>,
11986 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
11987 Sched<[sched.Folded, sched.ReadAfterFold]>;
11990 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
11991 let Predicates = [HasBITALG] in
11992 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
11993 let Predicates = [HasBITALG, HasVLX] in {
11994 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
11995 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
11999 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12000 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12002 //===----------------------------------------------------------------------===//
12004 //===----------------------------------------------------------------------===//
12006 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12007 X86SchedWriteWidths sched> {
12008 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12009 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12011 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12012 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12014 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12019 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12021 EVEX_CD8<8, CD8VF>, T8PD;
12023 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12024 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12025 X86VectorVTInfo BcstVTI>
12026 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12027 let ExeDomain = VTI.ExeDomain in
12028 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12029 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12030 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12031 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12032 (OpNode (VTI.VT VTI.RC:$src1),
12033 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12034 (i8 timm:$src3))>, EVEX_B,
12035 Sched<[sched.Folded, sched.ReadAfterFold]>;
12038 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12039 X86SchedWriteWidths sched> {
12040 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12041 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12042 v64i8_info, v8i64_info>, EVEX_V512;
12043 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12044 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12045 v32i8x_info, v4i64x_info>, EVEX_V256;
12046 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12047 v16i8x_info, v2i64x_info>, EVEX_V128;
12051 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12052 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12053 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12054 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12055 X86GF2P8affineqb, SchedWriteVecIMul>,
12056 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12059 //===----------------------------------------------------------------------===//
12061 //===----------------------------------------------------------------------===//
12063 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12064 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12065 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12066 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12067 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12068 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12069 Sched<[SchedWriteFMA.ZMM.Folded]>;
12071 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12072 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12073 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12074 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12075 Sched<[SchedWriteFMA.ZMM.Folded]>;
12077 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12078 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12079 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12080 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12081 Sched<[SchedWriteFMA.Scl.Folded]>;
12083 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12084 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12085 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12086 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12087 Sched<[SchedWriteFMA.Scl.Folded]>;
12090 //===----------------------------------------------------------------------===//
12092 //===----------------------------------------------------------------------===//
12094 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12095 Constraints = "$src1 = $dst" in {
12096 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12097 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12098 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12099 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12100 Sched<[SchedWriteFMA.ZMM.Folded]>;
12102 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12103 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12104 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12105 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12106 Sched<[SchedWriteFMA.ZMM.Folded]>;
12109 let hasSideEffects = 0 in {
12110 let mayStore = 1, SchedRW = [WriteFStoreX] in
12111 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12112 let mayLoad = 1, SchedRW = [WriteFLoadX] in
12113 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12116 //===----------------------------------------------------------------------===//
12118 //===----------------------------------------------------------------------===//
12120 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12121 def rr : I<0x68, MRMSrcReg,
12122 (outs _.KRPC:$dst),
12123 (ins _.RC:$src1, _.RC:$src2),
12124 !strconcat("vp2intersect", _.Suffix,
12125 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12126 [(set _.KRPC:$dst, (X86vp2intersect
12127 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12128 EVEX_4V, T8XD, Sched<[sched]>;
12130 def rm : I<0x68, MRMSrcMem,
12131 (outs _.KRPC:$dst),
12132 (ins _.RC:$src1, _.MemOp:$src2),
12133 !strconcat("vp2intersect", _.Suffix,
12134 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12135 [(set _.KRPC:$dst, (X86vp2intersect
12136 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12137 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12138 Sched<[sched.Folded, sched.ReadAfterFold]>;
12140 def rmb : I<0x68, MRMSrcMem,
12141 (outs _.KRPC:$dst),
12142 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12143 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12144 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12145 [(set _.KRPC:$dst, (X86vp2intersect
12146 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12147 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12148 Sched<[sched.Folded, sched.ReadAfterFold]>;
12151 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12152 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12153 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12155 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12156 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12157 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12161 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12162 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12164 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12165 X86SchedWriteWidths sched,
12166 AVX512VLVectorVTInfo _SrcVTInfo,
12167 AVX512VLVectorVTInfo _DstVTInfo,
12168 SDNode OpNode, Predicate prd,
12169 bit IsCommutable = 0> {
12170 let Predicates = [prd] in
12171 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12172 _SrcVTInfo.info512, _DstVTInfo.info512,
12173 _SrcVTInfo.info512, IsCommutable>,
12174 EVEX_V512, EVEX_CD8<32, CD8VF>;
12175 let Predicates = [HasVLX, prd] in {
12176 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12177 _SrcVTInfo.info256, _DstVTInfo.info256,
12178 _SrcVTInfo.info256, IsCommutable>,
12179 EVEX_V256, EVEX_CD8<32, CD8VF>;
12180 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12181 _SrcVTInfo.info128, _DstVTInfo.info128,
12182 _SrcVTInfo.info128, IsCommutable>,
12183 EVEX_V128, EVEX_CD8<32, CD8VF>;
12187 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12188 SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
12189 avx512vl_f32_info, avx512vl_i16_info,
12190 X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12192 // Truncate Float to BFloat16
12193 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12194 X86SchedWriteWidths sched> {
12195 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12196 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12197 X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12199 let Predicates = [HasBF16, HasVLX] in {
12200 let Uses = []<Register>, mayRaiseFPException = 0 in {
12201 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12202 null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12204 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12206 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12209 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12210 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12212 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
12213 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12214 f128mem:$src), 0, "intel">;
12215 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12216 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12218 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
12219 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12220 f256mem:$src), 0, "intel">;
12224 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12225 SchedWriteCvtPD2PS>, T8XS,
12226 EVEX_CD8<32, CD8VF>;
12228 let Predicates = [HasBF16, HasVLX] in {
12229 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12230 // patterns have been disabled with null_frag.
12231 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12232 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12233 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12235 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12236 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12238 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12240 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12241 (VCVTNEPS2BF16Z128rm addr:$src)>;
12242 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12244 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12245 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12247 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12249 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12250 (X86VBroadcastld32 addr:$src)))),
12251 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12252 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12253 (v8i16 VR128X:$src0), VK4WM:$mask),
12254 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12255 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12256 v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12257 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12260 let Constraints = "$src1 = $dst" in {
12261 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12262 X86FoldableSchedWrite sched,
12263 X86VectorVTInfo _, X86VectorVTInfo src_v> {
12264 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12265 (ins _.RC:$src2, _.RC:$src3),
12266 OpcodeStr, "$src3, $src2", "$src2, $src3",
12267 (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
12268 EVEX_4V, Sched<[sched]>;
12270 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12271 (ins _.RC:$src2, _.MemOp:$src3),
12272 OpcodeStr, "$src3, $src2", "$src2, $src3",
12273 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12274 (src_v.VT (bitconvert
12275 (src_v.LdFrag addr:$src3)))))>, EVEX_4V,
12276 Sched<[sched.Folded, sched.ReadAfterFold]>;
12278 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12279 (ins _.RC:$src2, _.ScalarMemOp:$src3),
12281 !strconcat("${src3}", _.BroadcastStr,", $src2"),
12282 !strconcat("$src2, ${src3}", _.BroadcastStr),
12283 (_.VT (OpNode _.RC:$src1, _.RC:$src2,
12284 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12285 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12288 } // Constraints = "$src1 = $dst"
12290 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12291 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12292 AVX512VLVectorVTInfo src_v, Predicate prd> {
12293 let Predicates = [prd] in {
12294 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12295 src_v.info512>, EVEX_V512;
12297 let Predicates = [HasVLX, prd] in {
12298 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12299 src_v.info256>, EVEX_V256;
12300 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12301 src_v.info128>, EVEX_V128;
12305 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12306 avx512vl_f32_info, avx512vl_i32_info,
12307 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;