1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT). These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
23 RegisterClass RC = rc;
24 ValueType EltVT = eltvt;
25 int NumElts = numelts;
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
34 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
36 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
39 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
48 ValueType VT = !cast<ValueType>(VTName);
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
52 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
55 // "i" for integer types and "f" for floating-point types
56 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
58 // Size of RC in bits, e.g. 512 for VR512.
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
69 PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
71 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
73 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
75 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
76 !cast<ComplexPattern>("sse_load_f32"),
77 !if (!eq (EltTypeName, "f64"),
78 !cast<ComplexPattern>("sse_load_f64"),
81 // The string to specify embedded broadcast in assembly.
82 string BroadcastStr = "{1to" # NumElts # "}";
84 // 8-bit compressed displacement tuple/subvector format. This is only
85 // defined for NumElts <= 8.
86 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
87 !cast<CD8VForm>("CD8VT" # NumElts), ?);
89 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
90 !if (!eq (Size, 256), sub_ymm, ?));
92 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
93 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
96 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
98 // A vector type of the same width with element type i32. This is used to
99 // create the canonical constant zero node ImmAllZerosV.
100 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
101 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
103 string ZSuffix = !if (!eq (Size, 128), "Z128",
104 !if (!eq (Size, 256), "Z256", "Z"));
107 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
108 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
109 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
110 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
111 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
112 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
114 // "x" in v32i8x_info means RC = VR256X
115 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
116 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
117 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
118 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
119 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
120 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
122 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
123 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
124 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
125 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
126 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
127 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
129 // We map scalar types to the smallest (128-bit) vector type
130 // with the appropriate element type. This allows to use the same masking logic.
131 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
132 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
133 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
134 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
136 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
137 X86VectorVTInfo i128> {
138 X86VectorVTInfo info512 = i512;
139 X86VectorVTInfo info256 = i256;
140 X86VectorVTInfo info128 = i128;
143 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
145 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
147 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
149 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
151 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
153 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
156 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
158 RegisterClass KRC = _krc;
159 RegisterClass KRCWM = _krcwm;
163 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
164 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
165 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
166 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
167 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
168 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
169 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
171 // This multiclass generates the masking variants from the non-masking
172 // variant. It only provides the assembly pieces for the masking variants.
173 // It assumes custom ISel patterns for masking which can be provided as
174 // template arguments.
175 multiclass AVX512_maskable_custom<bits<8> O, Format F,
177 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
179 string AttSrcAsm, string IntelSrcAsm,
181 list<dag> MaskingPattern,
182 list<dag> ZeroMaskingPattern,
183 string MaskingConstraint = "",
184 bit IsCommutable = 0,
185 bit IsKCommutable = 0,
186 bit IsKZCommutable = IsCommutable> {
187 let isCommutable = IsCommutable in
188 def NAME: AVX512<O, F, Outs, Ins,
189 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
190 "$dst, "#IntelSrcAsm#"}",
193 // Prefer over VMOV*rrk Pat<>
194 let isCommutable = IsKCommutable in
195 def NAME#k: AVX512<O, F, Outs, MaskingIns,
196 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
197 "$dst {${mask}}, "#IntelSrcAsm#"}",
200 // In case of the 3src subclass this is overridden with a let.
201 string Constraints = MaskingConstraint;
204 // Zero mask does not add any restrictions to commute operands transformation.
205 // So, it is Ok to use IsCommutable instead of IsKCommutable.
206 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
207 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
208 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
209 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
215 // Common base class of AVX512_maskable and AVX512_maskable_3src.
216 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
218 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
220 string AttSrcAsm, string IntelSrcAsm,
221 dag RHS, dag MaskingRHS,
222 SDNode Select = vselect,
223 string MaskingConstraint = "",
224 bit IsCommutable = 0,
225 bit IsKCommutable = 0,
226 bit IsKZCommutable = IsCommutable> :
227 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
228 AttSrcAsm, IntelSrcAsm,
229 [(set _.RC:$dst, RHS)],
230 [(set _.RC:$dst, MaskingRHS)],
232 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
233 MaskingConstraint, IsCommutable,
234 IsKCommutable, IsKZCommutable>;
236 // This multiclass generates the unconditional/non-masking, the masking and
237 // the zero-masking variant of the vector instruction. In the masking case, the
238 // perserved vector elements come from a new dummy input operand tied to $dst.
239 // This version uses a separate dag for non-masking and masking.
240 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
241 dag Outs, dag Ins, string OpcodeStr,
242 string AttSrcAsm, string IntelSrcAsm,
243 dag RHS, dag MaskRHS,
244 bit IsCommutable = 0, bit IsKCommutable = 0,
245 SDNode Select = vselect> :
246 AVX512_maskable_custom<O, F, Outs, Ins,
247 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
248 !con((ins _.KRCWM:$mask), Ins),
249 OpcodeStr, AttSrcAsm, IntelSrcAsm,
250 [(set _.RC:$dst, RHS)],
252 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
254 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
255 "$src0 = $dst", IsCommutable, IsKCommutable>;
257 // This multiclass generates the unconditional/non-masking, the masking and
258 // the zero-masking variant of the vector instruction. In the masking case, the
259 // perserved vector elements come from a new dummy input operand tied to $dst.
260 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
261 dag Outs, dag Ins, string OpcodeStr,
262 string AttSrcAsm, string IntelSrcAsm,
264 bit IsCommutable = 0, bit IsKCommutable = 0,
265 bit IsKZCommutable = IsCommutable,
266 SDNode Select = vselect> :
267 AVX512_maskable_common<O, F, _, Outs, Ins,
268 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
269 !con((ins _.KRCWM:$mask), Ins),
270 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
271 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
272 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
275 // This multiclass generates the unconditional/non-masking, the masking and
276 // the zero-masking variant of the scalar instruction.
277 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
278 dag Outs, dag Ins, string OpcodeStr,
279 string AttSrcAsm, string IntelSrcAsm,
281 bit IsCommutable = 0> :
282 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
283 RHS, IsCommutable, 0, IsCommutable, X86selects>;
285 // Similar to AVX512_maskable but in this case one of the source operands
286 // ($src1) is already tied to $dst so we just use that for the preserved
287 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
289 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
290 dag Outs, dag NonTiedIns, string OpcodeStr,
291 string AttSrcAsm, string IntelSrcAsm,
293 bit IsCommutable = 0,
294 bit IsKCommutable = 0,
295 SDNode Select = vselect,
297 AVX512_maskable_common<O, F, _, Outs,
298 !con((ins _.RC:$src1), NonTiedIns),
299 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
300 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
301 OpcodeStr, AttSrcAsm, IntelSrcAsm,
302 !if(MaskOnly, (null_frag), RHS),
303 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
304 Select, "", IsCommutable, IsKCommutable>;
306 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
307 // operand differs from the output VT. This requires a bitconvert on
308 // the preserved vector going into the vselect.
309 // NOTE: The unmasked pattern is disabled.
310 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
311 X86VectorVTInfo InVT,
312 dag Outs, dag NonTiedIns, string OpcodeStr,
313 string AttSrcAsm, string IntelSrcAsm,
314 dag RHS, bit IsCommutable = 0> :
315 AVX512_maskable_common<O, F, OutVT, Outs,
316 !con((ins InVT.RC:$src1), NonTiedIns),
317 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
318 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
319 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
320 (vselect InVT.KRCWM:$mask, RHS,
321 (bitconvert InVT.RC:$src1)),
322 vselect, "", IsCommutable>;
324 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
325 dag Outs, dag NonTiedIns, string OpcodeStr,
326 string AttSrcAsm, string IntelSrcAsm,
328 bit IsCommutable = 0,
329 bit IsKCommutable = 0,
331 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
332 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
333 X86selects, MaskOnly>;
335 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
338 string AttSrcAsm, string IntelSrcAsm,
340 AVX512_maskable_custom<O, F, Outs, Ins,
341 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
342 !con((ins _.KRCWM:$mask), Ins),
343 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
346 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
347 dag Outs, dag NonTiedIns,
349 string AttSrcAsm, string IntelSrcAsm,
351 AVX512_maskable_custom<O, F, Outs,
352 !con((ins _.RC:$src1), NonTiedIns),
353 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
354 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
355 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
358 // Instruction with mask that puts result in mask register,
359 // like "compare" and "vptest"
360 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
362 dag Ins, dag MaskingIns,
364 string AttSrcAsm, string IntelSrcAsm,
366 list<dag> MaskingPattern,
367 bit IsCommutable = 0> {
368 let isCommutable = IsCommutable in
369 def NAME: AVX512<O, F, Outs, Ins,
370 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
371 "$dst, "#IntelSrcAsm#"}",
374 def NAME#k: AVX512<O, F, Outs, MaskingIns,
375 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
376 "$dst {${mask}}, "#IntelSrcAsm#"}",
377 MaskingPattern>, EVEX_K;
380 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
382 dag Ins, dag MaskingIns,
384 string AttSrcAsm, string IntelSrcAsm,
385 dag RHS, dag MaskingRHS,
386 bit IsCommutable = 0> :
387 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
388 AttSrcAsm, IntelSrcAsm,
389 [(set _.KRC:$dst, RHS)],
390 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
392 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
393 dag Outs, dag Ins, string OpcodeStr,
394 string AttSrcAsm, string IntelSrcAsm,
395 dag RHS, bit IsCommutable = 0> :
396 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
397 !con((ins _.KRCWM:$mask), Ins),
398 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
399 (and _.KRCWM:$mask, RHS), IsCommutable>;
401 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
402 dag Outs, dag Ins, string OpcodeStr,
403 string AttSrcAsm, string IntelSrcAsm> :
404 AVX512_maskable_custom_cmp<O, F, Outs,
405 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
406 AttSrcAsm, IntelSrcAsm, [], []>;
408 // This multiclass generates the unconditional/non-masking, the masking and
409 // the zero-masking variant of the vector instruction. In the masking case, the
410 // perserved vector elements come from a new dummy input operand tied to $dst.
411 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
412 dag Outs, dag Ins, string OpcodeStr,
413 string AttSrcAsm, string IntelSrcAsm,
414 dag RHS, dag MaskedRHS,
415 bit IsCommutable = 0, SDNode Select = vselect> :
416 AVX512_maskable_custom<O, F, Outs, Ins,
417 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
418 !con((ins _.KRCWM:$mask), Ins),
419 OpcodeStr, AttSrcAsm, IntelSrcAsm,
420 [(set _.RC:$dst, RHS)],
422 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
424 (Select _.KRCWM:$mask, MaskedRHS,
426 "$src0 = $dst", IsCommutable>;
429 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
430 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
431 // swizzled by ExecutionDomainFix to pxor.
432 // We set canFoldAsLoad because this can be converted to a constant-pool
433 // load of an all-zeros value if folding it would be beneficial.
434 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
435 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
436 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
437 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
438 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
439 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
442 // Alias instructions that allow VPTERNLOG to be used with a mask to create
443 // a mix of all ones and all zeros elements. This is done this way to force
444 // the same register to be used as input for all three sources.
445 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
446 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
447 (ins VK16WM:$mask), "",
448 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
449 (v16i32 immAllOnesV),
450 (v16i32 immAllZerosV)))]>;
451 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
452 (ins VK8WM:$mask), "",
453 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
454 (bc_v8i64 (v16i32 immAllOnesV)),
455 (bc_v8i64 (v16i32 immAllZerosV))))]>;
458 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
459 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
460 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
461 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
462 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
463 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
466 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
467 // This is expanded by ExpandPostRAPseudos.
468 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
469 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
470 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
471 [(set FR32X:$dst, fp32imm0)]>;
472 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
473 [(set FR64X:$dst, fpimm0)]>;
476 //===----------------------------------------------------------------------===//
477 // AVX-512 - VECTOR INSERT
480 // Supports two different pattern operators for mask and unmasked ops. Allows
481 // null_frag to be passed for one.
482 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
484 SDPatternOperator vinsert_insert,
485 SDPatternOperator vinsert_for_mask,
486 X86FoldableSchedWrite sched> {
487 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
488 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
489 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
490 "vinsert" # From.EltTypeName # "x" # From.NumElts,
491 "$src3, $src2, $src1", "$src1, $src2, $src3",
492 (vinsert_insert:$src3 (To.VT To.RC:$src1),
493 (From.VT From.RC:$src2),
495 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
496 (From.VT From.RC:$src2),
498 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
500 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
501 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
502 "vinsert" # From.EltTypeName # "x" # From.NumElts,
503 "$src3, $src2, $src1", "$src1, $src2, $src3",
504 (vinsert_insert:$src3 (To.VT To.RC:$src1),
505 (From.VT (From.LdFrag addr:$src2)),
507 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
508 (From.VT (From.LdFrag addr:$src2)),
509 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
510 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
511 Sched<[sched.Folded, sched.ReadAfterFold]>;
515 // Passes the same pattern operator for masked and unmasked ops.
516 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
518 SDPatternOperator vinsert_insert,
519 X86FoldableSchedWrite sched> :
520 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
522 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
523 X86VectorVTInfo To, PatFrag vinsert_insert,
524 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
525 let Predicates = p in {
526 def : Pat<(vinsert_insert:$ins
527 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
528 (To.VT (!cast<Instruction>(InstrStr#"rr")
529 To.RC:$src1, From.RC:$src2,
530 (INSERT_get_vinsert_imm To.RC:$ins)))>;
532 def : Pat<(vinsert_insert:$ins
534 (From.VT (From.LdFrag addr:$src2)),
536 (To.VT (!cast<Instruction>(InstrStr#"rm")
537 To.RC:$src1, addr:$src2,
538 (INSERT_get_vinsert_imm To.RC:$ins)))>;
542 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
543 ValueType EltVT64, int Opcode256,
544 X86FoldableSchedWrite sched> {
546 let Predicates = [HasVLX] in
547 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
548 X86VectorVTInfo< 4, EltVT32, VR128X>,
549 X86VectorVTInfo< 8, EltVT32, VR256X>,
550 vinsert128_insert, sched>, EVEX_V256;
552 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
553 X86VectorVTInfo< 4, EltVT32, VR128X>,
554 X86VectorVTInfo<16, EltVT32, VR512>,
555 vinsert128_insert, sched>, EVEX_V512;
557 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
558 X86VectorVTInfo< 4, EltVT64, VR256X>,
559 X86VectorVTInfo< 8, EltVT64, VR512>,
560 vinsert256_insert, sched>, VEX_W, EVEX_V512;
562 // Even with DQI we'd like to only use these instructions for masking.
563 let Predicates = [HasVLX, HasDQI] in
564 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
565 X86VectorVTInfo< 2, EltVT64, VR128X>,
566 X86VectorVTInfo< 4, EltVT64, VR256X>,
567 null_frag, vinsert128_insert, sched>,
570 // Even with DQI we'd like to only use these instructions for masking.
571 let Predicates = [HasDQI] in {
572 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
573 X86VectorVTInfo< 2, EltVT64, VR128X>,
574 X86VectorVTInfo< 8, EltVT64, VR512>,
575 null_frag, vinsert128_insert, sched>,
578 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
579 X86VectorVTInfo< 8, EltVT32, VR256X>,
580 X86VectorVTInfo<16, EltVT32, VR512>,
581 null_frag, vinsert256_insert, sched>,
586 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
587 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
588 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
590 // Codegen pattern with the alternative types,
591 // Even with AVX512DQ we'll still use these for unmasked operations.
592 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
593 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
594 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
595 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
597 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
598 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
599 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
600 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
602 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
603 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
604 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
605 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
607 // Codegen pattern with the alternative types insert VEC128 into VEC256
608 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
609 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
610 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
611 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
612 // Codegen pattern with the alternative types insert VEC128 into VEC512
613 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
614 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
615 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
616 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
617 // Codegen pattern with the alternative types insert VEC256 into VEC512
618 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
619 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
620 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
621 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
624 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
625 X86VectorVTInfo To, X86VectorVTInfo Cast,
626 PatFrag vinsert_insert,
627 SDNodeXForm INSERT_get_vinsert_imm,
629 let Predicates = p in {
631 (vselect Cast.KRCWM:$mask,
633 (vinsert_insert:$ins (To.VT To.RC:$src1),
634 (From.VT From.RC:$src2),
637 (!cast<Instruction>(InstrStr#"rrk")
638 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
639 (INSERT_get_vinsert_imm To.RC:$ins))>;
641 (vselect Cast.KRCWM:$mask,
643 (vinsert_insert:$ins (To.VT To.RC:$src1),
646 (From.LdFrag addr:$src2))),
649 (!cast<Instruction>(InstrStr#"rmk")
650 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
651 (INSERT_get_vinsert_imm To.RC:$ins))>;
654 (vselect Cast.KRCWM:$mask,
656 (vinsert_insert:$ins (To.VT To.RC:$src1),
657 (From.VT From.RC:$src2),
660 (!cast<Instruction>(InstrStr#"rrkz")
661 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
662 (INSERT_get_vinsert_imm To.RC:$ins))>;
664 (vselect Cast.KRCWM:$mask,
666 (vinsert_insert:$ins (To.VT To.RC:$src1),
667 (From.VT (From.LdFrag addr:$src2)),
670 (!cast<Instruction>(InstrStr#"rmkz")
671 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
672 (INSERT_get_vinsert_imm To.RC:$ins))>;
676 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
677 v8f32x_info, vinsert128_insert,
678 INSERT_get_vinsert128_imm, [HasVLX]>;
679 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
680 v4f64x_info, vinsert128_insert,
681 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
683 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
684 v8i32x_info, vinsert128_insert,
685 INSERT_get_vinsert128_imm, [HasVLX]>;
686 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
687 v8i32x_info, vinsert128_insert,
688 INSERT_get_vinsert128_imm, [HasVLX]>;
689 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
690 v8i32x_info, vinsert128_insert,
691 INSERT_get_vinsert128_imm, [HasVLX]>;
692 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
693 v4i64x_info, vinsert128_insert,
694 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
695 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
696 v4i64x_info, vinsert128_insert,
697 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
698 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
699 v4i64x_info, vinsert128_insert,
700 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
702 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
703 v16f32_info, vinsert128_insert,
704 INSERT_get_vinsert128_imm, [HasAVX512]>;
705 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
706 v8f64_info, vinsert128_insert,
707 INSERT_get_vinsert128_imm, [HasDQI]>;
709 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
710 v16i32_info, vinsert128_insert,
711 INSERT_get_vinsert128_imm, [HasAVX512]>;
712 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
713 v16i32_info, vinsert128_insert,
714 INSERT_get_vinsert128_imm, [HasAVX512]>;
715 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
716 v16i32_info, vinsert128_insert,
717 INSERT_get_vinsert128_imm, [HasAVX512]>;
718 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
719 v8i64_info, vinsert128_insert,
720 INSERT_get_vinsert128_imm, [HasDQI]>;
721 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
722 v8i64_info, vinsert128_insert,
723 INSERT_get_vinsert128_imm, [HasDQI]>;
724 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
725 v8i64_info, vinsert128_insert,
726 INSERT_get_vinsert128_imm, [HasDQI]>;
728 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
729 v16f32_info, vinsert256_insert,
730 INSERT_get_vinsert256_imm, [HasDQI]>;
731 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
732 v8f64_info, vinsert256_insert,
733 INSERT_get_vinsert256_imm, [HasAVX512]>;
735 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
736 v16i32_info, vinsert256_insert,
737 INSERT_get_vinsert256_imm, [HasDQI]>;
738 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
739 v16i32_info, vinsert256_insert,
740 INSERT_get_vinsert256_imm, [HasDQI]>;
741 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
742 v16i32_info, vinsert256_insert,
743 INSERT_get_vinsert256_imm, [HasDQI]>;
744 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
745 v8i64_info, vinsert256_insert,
746 INSERT_get_vinsert256_imm, [HasAVX512]>;
747 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
748 v8i64_info, vinsert256_insert,
749 INSERT_get_vinsert256_imm, [HasAVX512]>;
750 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
751 v8i64_info, vinsert256_insert,
752 INSERT_get_vinsert256_imm, [HasAVX512]>;
754 // vinsertps - insert f32 to XMM
755 let ExeDomain = SSEPackedSingle in {
756 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
757 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
758 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
759 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
760 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
761 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
762 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
763 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
764 [(set VR128X:$dst, (X86insertps VR128X:$src1,
765 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
767 EVEX_4V, EVEX_CD8<32, CD8VT1>,
768 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
771 //===----------------------------------------------------------------------===//
772 // AVX-512 VECTOR EXTRACT
775 // Supports two different pattern operators for mask and unmasked ops. Allows
776 // null_frag to be passed for one.
777 multiclass vextract_for_size_split<int Opcode,
778 X86VectorVTInfo From, X86VectorVTInfo To,
779 SDPatternOperator vextract_extract,
780 SDPatternOperator vextract_for_mask,
781 SchedWrite SchedRR, SchedWrite SchedMR> {
783 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
784 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
785 (ins From.RC:$src1, u8imm:$idx),
786 "vextract" # To.EltTypeName # "x" # To.NumElts,
787 "$idx, $src1", "$src1, $idx",
788 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
789 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
790 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
792 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
793 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
794 "vextract" # To.EltTypeName # "x" # To.NumElts #
795 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
796 [(store (To.VT (vextract_extract:$idx
797 (From.VT From.RC:$src1), (iPTR imm))),
801 let mayStore = 1, hasSideEffects = 0 in
802 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
803 (ins To.MemOp:$dst, To.KRCWM:$mask,
804 From.RC:$src1, u8imm:$idx),
805 "vextract" # To.EltTypeName # "x" # To.NumElts #
806 "\t{$idx, $src1, $dst {${mask}}|"
807 "$dst {${mask}}, $src1, $idx}", []>,
808 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
812 // Passes the same pattern operator for masked and unmasked ops.
813 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
815 SDPatternOperator vextract_extract,
816 SchedWrite SchedRR, SchedWrite SchedMR> :
817 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
819 // Codegen pattern for the alternative types
820 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
821 X86VectorVTInfo To, PatFrag vextract_extract,
822 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
823 let Predicates = p in {
824 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
825 (To.VT (!cast<Instruction>(InstrStr#"rr")
827 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
828 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
829 (iPTR imm))), addr:$dst),
830 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
831 (EXTRACT_get_vextract_imm To.RC:$ext))>;
835 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
836 ValueType EltVT64, int Opcode256,
837 SchedWrite SchedRR, SchedWrite SchedMR> {
838 let Predicates = [HasAVX512] in {
839 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
840 X86VectorVTInfo<16, EltVT32, VR512>,
841 X86VectorVTInfo< 4, EltVT32, VR128X>,
842 vextract128_extract, SchedRR, SchedMR>,
843 EVEX_V512, EVEX_CD8<32, CD8VT4>;
844 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
845 X86VectorVTInfo< 8, EltVT64, VR512>,
846 X86VectorVTInfo< 4, EltVT64, VR256X>,
847 vextract256_extract, SchedRR, SchedMR>,
848 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
850 let Predicates = [HasVLX] in
851 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
852 X86VectorVTInfo< 8, EltVT32, VR256X>,
853 X86VectorVTInfo< 4, EltVT32, VR128X>,
854 vextract128_extract, SchedRR, SchedMR>,
855 EVEX_V256, EVEX_CD8<32, CD8VT4>;
857 // Even with DQI we'd like to only use these instructions for masking.
858 let Predicates = [HasVLX, HasDQI] in
859 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
860 X86VectorVTInfo< 4, EltVT64, VR256X>,
861 X86VectorVTInfo< 2, EltVT64, VR128X>,
862 null_frag, vextract128_extract, SchedRR, SchedMR>,
863 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
865 // Even with DQI we'd like to only use these instructions for masking.
866 let Predicates = [HasDQI] in {
867 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
868 X86VectorVTInfo< 8, EltVT64, VR512>,
869 X86VectorVTInfo< 2, EltVT64, VR128X>,
870 null_frag, vextract128_extract, SchedRR, SchedMR>,
871 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
872 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
873 X86VectorVTInfo<16, EltVT32, VR512>,
874 X86VectorVTInfo< 8, EltVT32, VR256X>,
875 null_frag, vextract256_extract, SchedRR, SchedMR>,
876 EVEX_V512, EVEX_CD8<32, CD8VT8>;
880 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
881 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
882 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
884 // extract_subvector codegen patterns with the alternative types.
885 // Even with AVX512DQ we'll still use these for unmasked operations.
886 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
887 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
888 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
889 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
891 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
892 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
893 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
894 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
896 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
897 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
898 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
899 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
901 // Codegen pattern with the alternative types extract VEC128 from VEC256
902 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
903 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
904 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
905 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
907 // Codegen pattern with the alternative types extract VEC128 from VEC512
908 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
909 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
910 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
911 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
912 // Codegen pattern with the alternative types extract VEC256 from VEC512
913 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
914 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
915 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
916 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
919 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
920 // smaller extract to enable EVEX->VEX.
921 let Predicates = [NoVLX] in {
922 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
923 (v2i64 (VEXTRACTI128rr
924 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
926 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
927 (v2f64 (VEXTRACTF128rr
928 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
930 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
931 (v4i32 (VEXTRACTI128rr
932 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
934 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
935 (v4f32 (VEXTRACTF128rr
936 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
938 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
939 (v8i16 (VEXTRACTI128rr
940 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
942 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
943 (v16i8 (VEXTRACTI128rr
944 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
948 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
949 // smaller extract to enable EVEX->VEX.
950 let Predicates = [HasVLX] in {
951 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
952 (v2i64 (VEXTRACTI32x4Z256rr
953 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
955 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
956 (v2f64 (VEXTRACTF32x4Z256rr
957 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
959 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
960 (v4i32 (VEXTRACTI32x4Z256rr
961 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
963 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
964 (v4f32 (VEXTRACTF32x4Z256rr
965 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
967 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
968 (v8i16 (VEXTRACTI32x4Z256rr
969 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
971 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
972 (v16i8 (VEXTRACTI32x4Z256rr
973 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
978 // Additional patterns for handling a bitcast between the vselect and the
979 // extract_subvector.
980 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
981 X86VectorVTInfo To, X86VectorVTInfo Cast,
982 PatFrag vextract_extract,
983 SDNodeXForm EXTRACT_get_vextract_imm,
985 let Predicates = p in {
986 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
988 (To.VT (vextract_extract:$ext
989 (From.VT From.RC:$src), (iPTR imm)))),
991 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
992 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
993 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
995 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
997 (To.VT (vextract_extract:$ext
998 (From.VT From.RC:$src), (iPTR imm)))),
1000 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1001 Cast.KRCWM:$mask, From.RC:$src,
1002 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1006 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1007 v4f32x_info, vextract128_extract,
1008 EXTRACT_get_vextract128_imm, [HasVLX]>;
1009 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1010 v2f64x_info, vextract128_extract,
1011 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1013 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1014 v4i32x_info, vextract128_extract,
1015 EXTRACT_get_vextract128_imm, [HasVLX]>;
1016 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1017 v4i32x_info, vextract128_extract,
1018 EXTRACT_get_vextract128_imm, [HasVLX]>;
1019 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1020 v4i32x_info, vextract128_extract,
1021 EXTRACT_get_vextract128_imm, [HasVLX]>;
1022 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1023 v2i64x_info, vextract128_extract,
1024 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1025 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1026 v2i64x_info, vextract128_extract,
1027 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1028 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1029 v2i64x_info, vextract128_extract,
1030 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1032 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1033 v4f32x_info, vextract128_extract,
1034 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1035 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1036 v2f64x_info, vextract128_extract,
1037 EXTRACT_get_vextract128_imm, [HasDQI]>;
1039 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1040 v4i32x_info, vextract128_extract,
1041 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1042 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1043 v4i32x_info, vextract128_extract,
1044 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1045 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1046 v4i32x_info, vextract128_extract,
1047 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1048 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1049 v2i64x_info, vextract128_extract,
1050 EXTRACT_get_vextract128_imm, [HasDQI]>;
1051 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1052 v2i64x_info, vextract128_extract,
1053 EXTRACT_get_vextract128_imm, [HasDQI]>;
1054 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1055 v2i64x_info, vextract128_extract,
1056 EXTRACT_get_vextract128_imm, [HasDQI]>;
1058 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1059 v8f32x_info, vextract256_extract,
1060 EXTRACT_get_vextract256_imm, [HasDQI]>;
1061 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1062 v4f64x_info, vextract256_extract,
1063 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1065 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1066 v8i32x_info, vextract256_extract,
1067 EXTRACT_get_vextract256_imm, [HasDQI]>;
1068 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1069 v8i32x_info, vextract256_extract,
1070 EXTRACT_get_vextract256_imm, [HasDQI]>;
1071 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1072 v8i32x_info, vextract256_extract,
1073 EXTRACT_get_vextract256_imm, [HasDQI]>;
1074 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1075 v4i64x_info, vextract256_extract,
1076 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1077 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1078 v4i64x_info, vextract256_extract,
1079 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1080 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1081 v4i64x_info, vextract256_extract,
1082 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1084 // vextractps - extract 32 bits from XMM
1085 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1086 (ins VR128X:$src1, u8imm:$src2),
1087 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1088 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1089 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1091 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1092 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1093 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1094 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1096 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1098 //===---------------------------------------------------------------------===//
1099 // AVX-512 BROADCAST
1101 // broadcast with a scalar argument.
1102 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1104 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1105 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1106 (!cast<Instruction>(Name#DestInfo.ZSuffix#r)
1107 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1108 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1109 (X86VBroadcast SrcInfo.FRC:$src),
1110 DestInfo.RC:$src0)),
1111 (!cast<Instruction>(Name#DestInfo.ZSuffix#rk)
1112 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1113 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1114 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1115 (X86VBroadcast SrcInfo.FRC:$src),
1116 DestInfo.ImmAllZerosV)),
1117 (!cast<Instruction>(Name#DestInfo.ZSuffix#rkz)
1118 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1121 // Split version to allow mask and broadcast node to be different types. This
1122 // helps support the 32x2 broadcasts.
1123 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1125 SchedWrite SchedRR, SchedWrite SchedRM,
1126 X86VectorVTInfo MaskInfo,
1127 X86VectorVTInfo DestInfo,
1128 X86VectorVTInfo SrcInfo,
1129 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1130 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1131 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1132 (outs MaskInfo.RC:$dst),
1133 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1137 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1141 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
1142 T8PD, EVEX, Sched<[SchedRR]>;
1144 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1145 (outs MaskInfo.RC:$dst),
1146 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1149 (DestInfo.VT (UnmaskedOp
1150 (SrcInfo.ScalarLdFrag addr:$src))))),
1153 (DestInfo.VT (X86VBroadcast
1154 (SrcInfo.ScalarLdFrag addr:$src)))))>,
1155 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1159 def : Pat<(MaskInfo.VT
1161 (DestInfo.VT (UnmaskedOp
1162 (SrcInfo.VT (scalar_to_vector
1163 (SrcInfo.ScalarLdFrag addr:$src))))))),
1164 (!cast<Instruction>(Name#MaskInfo.ZSuffix#m) addr:$src)>;
1165 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1169 (SrcInfo.VT (scalar_to_vector
1170 (SrcInfo.ScalarLdFrag addr:$src)))))),
1171 MaskInfo.RC:$src0)),
1172 (!cast<Instruction>(Name#DestInfo.ZSuffix#mk)
1173 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1174 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1178 (SrcInfo.VT (scalar_to_vector
1179 (SrcInfo.ScalarLdFrag addr:$src)))))),
1180 MaskInfo.ImmAllZerosV)),
1181 (!cast<Instruction>(Name#MaskInfo.ZSuffix#mkz)
1182 MaskInfo.KRCWM:$mask, addr:$src)>;
1185 // Helper class to force mask and broadcast result to same type.
1186 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1187 SchedWrite SchedRR, SchedWrite SchedRM,
1188 X86VectorVTInfo DestInfo,
1189 X86VectorVTInfo SrcInfo> :
1190 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1191 DestInfo, DestInfo, SrcInfo>;
1193 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1194 AVX512VLVectorVTInfo _> {
1195 let Predicates = [HasAVX512] in {
1196 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1197 WriteFShuffle256Ld, _.info512, _.info128>,
1198 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1203 let Predicates = [HasVLX] in {
1204 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1205 WriteFShuffle256Ld, _.info256, _.info128>,
1206 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1212 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1213 AVX512VLVectorVTInfo _> {
1214 let Predicates = [HasAVX512] in {
1215 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1216 WriteFShuffle256Ld, _.info512, _.info128>,
1217 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1222 let Predicates = [HasVLX] in {
1223 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1224 WriteFShuffle256Ld, _.info256, _.info128>,
1225 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1228 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1229 WriteFShuffle256Ld, _.info128, _.info128>,
1230 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1235 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1237 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1238 avx512vl_f64_info>, VEX_W1X;
1240 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1241 X86VectorVTInfo _, SDPatternOperator OpNode,
1242 RegisterClass SrcRC> {
1243 let ExeDomain = _.ExeDomain in
1244 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1246 "vpbroadcast"##_.Suffix, "$src", "$src",
1247 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX,
1251 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1252 X86VectorVTInfo _, SDPatternOperator OpNode,
1253 RegisterClass SrcRC, SubRegIndex Subreg> {
1254 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1255 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1256 (outs _.RC:$dst), (ins GR32:$src),
1257 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1258 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1259 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1260 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1262 def : Pat <(_.VT (OpNode SrcRC:$src)),
1263 (!cast<Instruction>(Name#r)
1264 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1266 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1267 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1268 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1270 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1271 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1272 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1275 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1276 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1277 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1278 let Predicates = [prd] in
1279 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1280 OpNode, SrcRC, Subreg>, EVEX_V512;
1281 let Predicates = [prd, HasVLX] in {
1282 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1283 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1284 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1285 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1289 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1290 SDPatternOperator OpNode,
1291 RegisterClass SrcRC, Predicate prd> {
1292 let Predicates = [prd] in
1293 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1295 let Predicates = [prd, HasVLX] in {
1296 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1298 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1303 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1304 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1305 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1306 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1308 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1309 X86VBroadcast, GR32, HasAVX512>;
1310 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1311 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1313 // Provide aliases for broadcast from the same register class that
1314 // automatically does the extract.
1315 multiclass avx512_int_broadcast_rm_lowering<string Name,
1316 X86VectorVTInfo DestInfo,
1317 X86VectorVTInfo SrcInfo,
1318 X86VectorVTInfo ExtInfo> {
1319 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1320 (!cast<Instruction>(Name#DestInfo.ZSuffix#"r")
1321 (ExtInfo.VT (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm)))>;
1324 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1325 AVX512VLVectorVTInfo _, Predicate prd> {
1326 let Predicates = [prd] in {
1327 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1328 WriteShuffle256Ld, _.info512, _.info128>,
1329 avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info256, _.info128>,
1331 // Defined separately to avoid redefinition.
1332 defm Z_Alt : avx512_int_broadcast_rm_lowering<NAME, _.info512, _.info512, _.info128>;
1334 let Predicates = [prd, HasVLX] in {
1335 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1336 WriteShuffle256Ld, _.info256, _.info128>,
1337 avx512_int_broadcast_rm_lowering<NAME, _.info256, _.info256, _.info128>,
1339 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1340 WriteShuffleXLd, _.info128, _.info128>,
1345 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1346 avx512vl_i8_info, HasBWI>;
1347 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1348 avx512vl_i16_info, HasBWI>;
1349 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1350 avx512vl_i32_info, HasAVX512>;
1351 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1352 avx512vl_i64_info, HasAVX512>, VEX_W1X;
1354 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1355 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1356 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1357 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1358 (_Dst.VT (X86SubVBroadcast
1359 (_Src.VT (_Src.LdFrag addr:$src))))>,
1360 Sched<[SchedWriteShuffle.YMM.Folded]>,
1364 // This should be used for the AVX512DQ broadcast instructions. It disables
1365 // the unmasked patterns so that we only use the DQ instructions when masking
1367 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1368 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1369 let hasSideEffects = 0, mayLoad = 1 in
1370 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1371 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1373 (_Dst.VT (X86SubVBroadcast
1374 (_Src.VT (_Src.LdFrag addr:$src))))>,
1375 Sched<[SchedWriteShuffle.YMM.Folded]>,
1379 let Predicates = [HasAVX512] in {
1380 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1381 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1382 (VPBROADCASTQZm addr:$src)>;
1385 let Predicates = [HasVLX] in {
1386 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1387 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1388 (VPBROADCASTQZ128m addr:$src)>;
1389 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1390 (VPBROADCASTQZ256m addr:$src)>;
1392 let Predicates = [HasVLX, HasBWI] in {
1393 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1394 // This means we'll encounter truncated i32 loads; match that here.
1395 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1396 (VPBROADCASTWZ128m addr:$src)>;
1397 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1398 (VPBROADCASTWZ256m addr:$src)>;
1399 def : Pat<(v8i16 (X86VBroadcast
1400 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1401 (VPBROADCASTWZ128m addr:$src)>;
1402 def : Pat<(v16i16 (X86VBroadcast
1403 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1404 (VPBROADCASTWZ256m addr:$src)>;
1407 //===----------------------------------------------------------------------===//
1408 // AVX-512 BROADCAST SUBVECTORS
1411 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1412 v16i32_info, v4i32x_info>,
1413 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1414 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1415 v16f32_info, v4f32x_info>,
1416 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1417 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1418 v8i64_info, v4i64x_info>, VEX_W,
1419 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1420 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1421 v8f64_info, v4f64x_info>, VEX_W,
1422 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1424 let Predicates = [HasAVX512] in {
1425 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1426 (VBROADCASTF64X4rm addr:$src)>;
1427 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1428 (VBROADCASTI64X4rm addr:$src)>;
1429 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1430 (VBROADCASTI64X4rm addr:$src)>;
1431 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1432 (VBROADCASTI64X4rm addr:$src)>;
1434 // Provide fallback in case the load node that is used in the patterns above
1435 // is used by additional users, which prevents the pattern selection.
1436 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1437 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1438 (v4f64 VR256X:$src), 1)>;
1439 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1440 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1441 (v8f32 VR256X:$src), 1)>;
1442 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1443 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1444 (v4i64 VR256X:$src), 1)>;
1445 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1446 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1447 (v8i32 VR256X:$src), 1)>;
1448 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1449 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1450 (v16i16 VR256X:$src), 1)>;
1451 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1452 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1453 (v32i8 VR256X:$src), 1)>;
1455 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1456 (VBROADCASTF32X4rm addr:$src)>;
1457 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1458 (VBROADCASTI32X4rm addr:$src)>;
1459 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1460 (VBROADCASTI32X4rm addr:$src)>;
1461 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1462 (VBROADCASTI32X4rm addr:$src)>;
1464 // Patterns for selects of bitcasted operations.
1465 def : Pat<(vselect VK16WM:$mask,
1466 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1467 (bc_v16f32 (v16i32 immAllZerosV))),
1468 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1469 def : Pat<(vselect VK16WM:$mask,
1470 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1472 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1473 def : Pat<(vselect VK16WM:$mask,
1474 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1475 (v16i32 immAllZerosV)),
1476 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1477 def : Pat<(vselect VK16WM:$mask,
1478 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1480 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1482 def : Pat<(vselect VK8WM:$mask,
1483 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1484 (bc_v8f64 (v16i32 immAllZerosV))),
1485 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1486 def : Pat<(vselect VK8WM:$mask,
1487 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1489 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1490 def : Pat<(vselect VK8WM:$mask,
1491 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1492 (bc_v8i64 (v16i32 immAllZerosV))),
1493 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1494 def : Pat<(vselect VK8WM:$mask,
1495 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1497 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1500 let Predicates = [HasVLX] in {
1501 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1502 v8i32x_info, v4i32x_info>,
1503 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1504 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1505 v8f32x_info, v4f32x_info>,
1506 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1508 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1509 (VBROADCASTF32X4Z256rm addr:$src)>;
1510 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1511 (VBROADCASTI32X4Z256rm addr:$src)>;
1512 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1513 (VBROADCASTI32X4Z256rm addr:$src)>;
1514 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1515 (VBROADCASTI32X4Z256rm addr:$src)>;
1517 // Patterns for selects of bitcasted operations.
1518 def : Pat<(vselect VK8WM:$mask,
1519 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1520 (bc_v8f32 (v8i32 immAllZerosV))),
1521 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1522 def : Pat<(vselect VK8WM:$mask,
1523 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1525 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1526 def : Pat<(vselect VK8WM:$mask,
1527 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1528 (v8i32 immAllZerosV)),
1529 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1530 def : Pat<(vselect VK8WM:$mask,
1531 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1533 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1536 // Provide fallback in case the load node that is used in the patterns above
1537 // is used by additional users, which prevents the pattern selection.
1538 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1539 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1540 (v2f64 VR128X:$src), 1)>;
1541 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1542 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1543 (v4f32 VR128X:$src), 1)>;
1544 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1545 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1546 (v2i64 VR128X:$src), 1)>;
1547 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1548 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1549 (v4i32 VR128X:$src), 1)>;
1550 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1551 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1552 (v8i16 VR128X:$src), 1)>;
1553 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1554 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1555 (v16i8 VR128X:$src), 1)>;
1558 let Predicates = [HasVLX, HasDQI] in {
1559 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1560 v4i64x_info, v2i64x_info>, VEX_W1X,
1561 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1562 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1563 v4f64x_info, v2f64x_info>, VEX_W1X,
1564 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1566 // Patterns for selects of bitcasted operations.
1567 def : Pat<(vselect VK4WM:$mask,
1568 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1569 (bc_v4f64 (v8i32 immAllZerosV))),
1570 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1571 def : Pat<(vselect VK4WM:$mask,
1572 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1574 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1575 def : Pat<(vselect VK4WM:$mask,
1576 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1577 (bc_v4i64 (v8i32 immAllZerosV))),
1578 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1579 def : Pat<(vselect VK4WM:$mask,
1580 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1582 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1585 let Predicates = [HasDQI] in {
1586 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1587 v8i64_info, v2i64x_info>, VEX_W,
1588 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1589 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1590 v16i32_info, v8i32x_info>,
1591 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1592 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1593 v8f64_info, v2f64x_info>, VEX_W,
1594 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1595 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1596 v16f32_info, v8f32x_info>,
1597 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1599 // Patterns for selects of bitcasted operations.
1600 def : Pat<(vselect VK16WM:$mask,
1601 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1602 (bc_v16f32 (v16i32 immAllZerosV))),
1603 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1604 def : Pat<(vselect VK16WM:$mask,
1605 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1607 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1608 def : Pat<(vselect VK16WM:$mask,
1609 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1610 (v16i32 immAllZerosV)),
1611 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1612 def : Pat<(vselect VK16WM:$mask,
1613 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1615 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1617 def : Pat<(vselect VK8WM:$mask,
1618 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1619 (bc_v8f64 (v16i32 immAllZerosV))),
1620 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1621 def : Pat<(vselect VK8WM:$mask,
1622 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1624 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1625 def : Pat<(vselect VK8WM:$mask,
1626 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1627 (bc_v8i64 (v16i32 immAllZerosV))),
1628 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1629 def : Pat<(vselect VK8WM:$mask,
1630 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1632 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1635 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1636 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1637 let Predicates = [HasDQI] in
1638 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1639 WriteShuffle256Ld, _Dst.info512,
1640 _Src.info512, _Src.info128, null_frag>,
1642 let Predicates = [HasDQI, HasVLX] in
1643 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1644 WriteShuffle256Ld, _Dst.info256,
1645 _Src.info256, _Src.info128, null_frag>,
1649 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1650 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1651 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1653 let Predicates = [HasDQI, HasVLX] in
1654 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1655 WriteShuffleXLd, _Dst.info128,
1656 _Src.info128, _Src.info128, null_frag>,
1660 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1661 avx512vl_i32_info, avx512vl_i64_info>;
1662 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1663 avx512vl_f32_info, avx512vl_f64_info>;
1665 let Predicates = [HasVLX] in {
1666 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1667 (VBROADCASTSSZ256r (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1668 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1669 (VBROADCASTSDZ256r (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1672 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1673 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))>;
1674 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1675 (VBROADCASTSSZr (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))>;
1677 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1678 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))>;
1679 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1680 (VBROADCASTSDZr (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))>;
1682 //===----------------------------------------------------------------------===//
1683 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1685 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1686 X86VectorVTInfo _, RegisterClass KRC> {
1687 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1688 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1689 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1690 EVEX, Sched<[WriteShuffle]>;
1693 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1694 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1695 let Predicates = [HasCDI] in
1696 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1697 let Predicates = [HasCDI, HasVLX] in {
1698 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1699 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1703 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1704 avx512vl_i32_info, VK16>;
1705 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1706 avx512vl_i64_info, VK8>, VEX_W;
1708 //===----------------------------------------------------------------------===//
1709 // -- VPERMI2 - 3 source operands form --
1710 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1711 X86FoldableSchedWrite sched,
1712 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1713 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1714 hasSideEffects = 0 in {
1715 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1716 (ins _.RC:$src2, _.RC:$src3),
1717 OpcodeStr, "$src3, $src2", "$src2, $src3",
1718 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1719 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1722 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1723 (ins _.RC:$src2, _.MemOp:$src3),
1724 OpcodeStr, "$src3, $src2", "$src2, $src3",
1725 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1726 (_.VT (_.LdFrag addr:$src3)))), 1>,
1727 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1731 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1732 X86FoldableSchedWrite sched,
1733 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1734 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1735 hasSideEffects = 0, mayLoad = 1 in
1736 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1737 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1738 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1739 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1740 (_.VT (X86VPermt2 _.RC:$src2,
1741 IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1742 AVX5128IBase, EVEX_4V, EVEX_B,
1743 Sched<[sched.Folded, sched.ReadAfterFold]>;
1746 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1747 X86FoldableSchedWrite sched,
1748 AVX512VLVectorVTInfo VTInfo,
1749 AVX512VLVectorVTInfo ShuffleMask> {
1750 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1751 ShuffleMask.info512>,
1752 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1753 ShuffleMask.info512>, EVEX_V512;
1754 let Predicates = [HasVLX] in {
1755 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1756 ShuffleMask.info128>,
1757 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1758 ShuffleMask.info128>, EVEX_V128;
1759 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1760 ShuffleMask.info256>,
1761 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1762 ShuffleMask.info256>, EVEX_V256;
1766 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1767 X86FoldableSchedWrite sched,
1768 AVX512VLVectorVTInfo VTInfo,
1769 AVX512VLVectorVTInfo Idx,
1771 let Predicates = [Prd] in
1772 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1773 Idx.info512>, EVEX_V512;
1774 let Predicates = [Prd, HasVLX] in {
1775 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1776 Idx.info128>, EVEX_V128;
1777 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1778 Idx.info256>, EVEX_V256;
1782 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1783 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1784 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1785 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1786 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1787 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1788 VEX_W, EVEX_CD8<16, CD8VF>;
1789 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1790 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1792 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1793 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1794 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1795 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1797 // Extra patterns to deal with extra bitcasts due to passthru and index being
1798 // different types on the fp versions.
1799 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1800 X86VectorVTInfo IdxVT,
1801 X86VectorVTInfo CastVT> {
1802 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1803 (X86VPermt2 (_.VT _.RC:$src2),
1804 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
1805 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1806 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1807 _.RC:$src2, _.RC:$src3)>;
1808 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1809 (X86VPermt2 _.RC:$src2,
1810 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1811 (_.LdFrag addr:$src3)),
1812 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1813 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1814 _.RC:$src2, addr:$src3)>;
1815 def : Pat<(_.VT (vselect _.KRCWM:$mask,
1816 (X86VPermt2 _.RC:$src2,
1817 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1818 (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1819 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1820 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1821 _.RC:$src2, addr:$src3)>;
1824 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1825 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1826 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1827 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1830 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1831 X86FoldableSchedWrite sched,
1832 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1833 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1834 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1835 (ins IdxVT.RC:$src2, _.RC:$src3),
1836 OpcodeStr, "$src3, $src2", "$src2, $src3",
1837 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1838 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1840 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1841 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1842 OpcodeStr, "$src3, $src2", "$src2, $src3",
1843 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1844 (_.LdFrag addr:$src3))), 1>,
1845 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1848 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1849 X86FoldableSchedWrite sched,
1850 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1851 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1852 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1853 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1854 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1855 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1856 (_.VT (X86VPermt2 _.RC:$src1,
1857 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1858 AVX5128IBase, EVEX_4V, EVEX_B,
1859 Sched<[sched.Folded, sched.ReadAfterFold]>;
1862 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1863 X86FoldableSchedWrite sched,
1864 AVX512VLVectorVTInfo VTInfo,
1865 AVX512VLVectorVTInfo ShuffleMask> {
1866 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1867 ShuffleMask.info512>,
1868 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1869 ShuffleMask.info512>, EVEX_V512;
1870 let Predicates = [HasVLX] in {
1871 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1872 ShuffleMask.info128>,
1873 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1874 ShuffleMask.info128>, EVEX_V128;
1875 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1876 ShuffleMask.info256>,
1877 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1878 ShuffleMask.info256>, EVEX_V256;
1882 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1883 X86FoldableSchedWrite sched,
1884 AVX512VLVectorVTInfo VTInfo,
1885 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1886 let Predicates = [Prd] in
1887 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1888 Idx.info512>, EVEX_V512;
1889 let Predicates = [Prd, HasVLX] in {
1890 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1891 Idx.info128>, EVEX_V128;
1892 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1893 Idx.info256>, EVEX_V256;
1897 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1898 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1899 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1900 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1901 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1902 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1903 VEX_W, EVEX_CD8<16, CD8VF>;
1904 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1905 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1907 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1908 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1909 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1910 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1912 //===----------------------------------------------------------------------===//
1913 // AVX-512 - BLEND using mask
1916 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1917 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1918 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1919 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1920 (ins _.RC:$src1, _.RC:$src2),
1921 !strconcat(OpcodeStr,
1922 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1923 EVEX_4V, Sched<[sched]>;
1924 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1925 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1926 !strconcat(OpcodeStr,
1927 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1928 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1929 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1930 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1931 !strconcat(OpcodeStr,
1932 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1933 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1934 let mayLoad = 1 in {
1935 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1936 (ins _.RC:$src1, _.MemOp:$src2),
1937 !strconcat(OpcodeStr,
1938 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1939 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1940 Sched<[sched.Folded, sched.ReadAfterFold]>;
1941 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1942 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1943 !strconcat(OpcodeStr,
1944 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1945 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1946 Sched<[sched.Folded, sched.ReadAfterFold]>;
1947 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1948 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1949 !strconcat(OpcodeStr,
1950 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1951 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1952 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1956 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1957 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1958 let mayLoad = 1, hasSideEffects = 0 in {
1959 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1960 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1961 !strconcat(OpcodeStr,
1962 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1963 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1964 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1965 Sched<[sched.Folded, sched.ReadAfterFold]>;
1967 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1968 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1969 !strconcat(OpcodeStr,
1970 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1971 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1972 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1973 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1975 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1976 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1977 !strconcat(OpcodeStr,
1978 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1979 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1980 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1981 Sched<[sched.Folded, sched.ReadAfterFold]>;
1985 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
1986 AVX512VLVectorVTInfo VTInfo> {
1987 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1988 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
1991 let Predicates = [HasVLX] in {
1992 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1993 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
1995 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
1996 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2001 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2002 AVX512VLVectorVTInfo VTInfo> {
2003 let Predicates = [HasBWI] in
2004 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2007 let Predicates = [HasBWI, HasVLX] in {
2008 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2010 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2015 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2017 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2018 avx512vl_f64_info>, VEX_W;
2019 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2021 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2022 avx512vl_i64_info>, VEX_W;
2023 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2025 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2026 avx512vl_i16_info>, VEX_W;
2028 //===----------------------------------------------------------------------===//
2029 // Compare Instructions
2030 //===----------------------------------------------------------------------===//
2032 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2034 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
2035 X86FoldableSchedWrite sched> {
2036 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2038 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2039 "vcmp${cc}"#_.Suffix,
2040 "$src2, $src1", "$src1, $src2",
2041 (OpNode (_.VT _.RC:$src1),
2043 imm:$cc)>, EVEX_4V, Sched<[sched]>;
2045 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2047 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
2048 "vcmp${cc}"#_.Suffix,
2049 "$src2, $src1", "$src1, $src2",
2050 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
2051 imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2052 Sched<[sched.Folded, sched.ReadAfterFold]>;
2054 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2056 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2057 "vcmp${cc}"#_.Suffix,
2058 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2059 (OpNodeRnd (_.VT _.RC:$src1),
2062 (i32 FROUND_NO_EXC))>,
2063 EVEX_4V, EVEX_B, Sched<[sched]>;
2064 // Accept explicit immediate argument form instead of comparison code.
2065 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2066 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2068 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2070 "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V,
2071 Sched<[sched]>, NotMemoryFoldable;
2073 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2075 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2077 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2078 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2079 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2081 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2083 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2085 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
2086 EVEX_4V, EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2087 }// let isAsmParserOnly = 1, hasSideEffects = 0
2089 let isCodeGenOnly = 1 in {
2090 let isCommutable = 1 in
2091 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2092 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
2093 !strconcat("vcmp${cc}", _.Suffix,
2094 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2095 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2098 EVEX_4V, Sched<[sched]>;
2099 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2101 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2102 !strconcat("vcmp${cc}", _.Suffix,
2103 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2104 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2105 (_.ScalarLdFrag addr:$src2),
2107 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
2108 Sched<[sched.Folded, sched.ReadAfterFold]>;
2112 let Predicates = [HasAVX512] in {
2113 let ExeDomain = SSEPackedSingle in
2114 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
2115 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2116 let ExeDomain = SSEPackedDouble in
2117 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
2118 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2121 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2122 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2124 let isCommutable = IsCommutable in
2125 def rr : AVX512BI<opc, MRMSrcReg,
2126 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2127 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2128 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>,
2129 EVEX_4V, Sched<[sched]>;
2130 def rm : AVX512BI<opc, MRMSrcMem,
2131 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2132 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2133 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2134 (_.VT (_.LdFrag addr:$src2))))]>,
2135 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2136 let isCommutable = IsCommutable in
2137 def rrk : AVX512BI<opc, MRMSrcReg,
2138 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2139 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2140 "$dst {${mask}}, $src1, $src2}"),
2141 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2142 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>,
2143 EVEX_4V, EVEX_K, Sched<[sched]>;
2144 def rmk : AVX512BI<opc, MRMSrcMem,
2145 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2146 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2147 "$dst {${mask}}, $src1, $src2}"),
2148 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2149 (OpNode (_.VT _.RC:$src1),
2150 (_.VT (_.LdFrag addr:$src2)))))]>,
2151 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2154 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2155 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2157 avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> {
2158 def rmb : AVX512BI<opc, MRMSrcMem,
2159 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2160 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2161 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2162 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2163 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>,
2164 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2165 def rmbk : AVX512BI<opc, MRMSrcMem,
2166 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2167 _.ScalarMemOp:$src2),
2168 !strconcat(OpcodeStr,
2169 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2170 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2171 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2172 (OpNode (_.VT _.RC:$src1),
2174 (_.ScalarLdFrag addr:$src2)))))]>,
2175 EVEX_4V, EVEX_K, EVEX_B,
2176 Sched<[sched.Folded, sched.ReadAfterFold]>;
2179 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode,
2180 X86SchedWriteWidths sched,
2181 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2182 bit IsCommutable = 0> {
2183 let Predicates = [prd] in
2184 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.ZMM,
2185 VTInfo.info512, IsCommutable>, EVEX_V512;
2187 let Predicates = [prd, HasVLX] in {
2188 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.YMM,
2189 VTInfo.info256, IsCommutable>, EVEX_V256;
2190 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched.XMM,
2191 VTInfo.info128, IsCommutable>, EVEX_V128;
2195 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2196 PatFrag OpNode, X86SchedWriteWidths sched,
2197 AVX512VLVectorVTInfo VTInfo,
2198 Predicate prd, bit IsCommutable = 0> {
2199 let Predicates = [prd] in
2200 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.ZMM,
2201 VTInfo.info512, IsCommutable>, EVEX_V512;
2203 let Predicates = [prd, HasVLX] in {
2204 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.YMM,
2205 VTInfo.info256, IsCommutable>, EVEX_V256;
2206 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched.XMM,
2207 VTInfo.info128, IsCommutable>, EVEX_V128;
2211 // This fragment treats X86cmpm as commutable to help match loads in both
2212 // operands for PCMPEQ.
2213 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2214 def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
2215 (X86setcc_commute node:$src1, node:$src2, SETEQ)>;
2216 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2217 (setcc node:$src1, node:$src2, SETGT)>;
2219 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2220 // increase the pattern complexity the way an immediate would.
2221 let AddedComplexity = 2 in {
2222 // FIXME: Is there a better scheduler class for VPCMP?
2223 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
2224 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2225 EVEX_CD8<8, CD8VF>, VEX_WIG;
2227 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
2228 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2229 EVEX_CD8<16, CD8VF>, VEX_WIG;
2231 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
2232 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2233 EVEX_CD8<32, CD8VF>;
2235 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
2236 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2237 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2239 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2240 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2241 EVEX_CD8<8, CD8VF>, VEX_WIG;
2243 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2244 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2245 EVEX_CD8<16, CD8VF>, VEX_WIG;
2247 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2248 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2249 EVEX_CD8<32, CD8VF>;
2251 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2252 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2253 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2256 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2257 PatFrag CommFrag, X86FoldableSchedWrite sched,
2258 X86VectorVTInfo _, string Name> {
2259 let isCommutable = 1 in
2260 def rri : AVX512AIi8<opc, MRMSrcReg,
2261 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2262 !strconcat("vpcmp${cc}", Suffix,
2263 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2264 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2267 EVEX_4V, Sched<[sched]>;
2268 def rmi : AVX512AIi8<opc, MRMSrcMem,
2269 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2270 !strconcat("vpcmp${cc}", Suffix,
2271 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2272 [(set _.KRC:$dst, (_.KVT
2275 (_.VT (_.LdFrag addr:$src2)),
2277 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2278 let isCommutable = 1 in
2279 def rrik : AVX512AIi8<opc, MRMSrcReg,
2280 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2282 !strconcat("vpcmp${cc}", Suffix,
2283 "\t{$src2, $src1, $dst {${mask}}|",
2284 "$dst {${mask}}, $src1, $src2}"),
2285 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2286 (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2289 EVEX_4V, EVEX_K, Sched<[sched]>;
2290 def rmik : AVX512AIi8<opc, MRMSrcMem,
2291 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2293 !strconcat("vpcmp${cc}", Suffix,
2294 "\t{$src2, $src1, $dst {${mask}}|",
2295 "$dst {${mask}}, $src1, $src2}"),
2296 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2300 (_.VT (_.LdFrag addr:$src2)),
2302 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2304 // Accept explicit immediate argument form instead of comparison code.
2305 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2306 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2307 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2308 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2309 "$dst, $src1, $src2, $cc}"), []>,
2310 EVEX_4V, Sched<[sched]>, NotMemoryFoldable;
2312 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2313 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2314 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2315 "$dst, $src1, $src2, $cc}"), []>,
2316 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
2317 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2318 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2320 !strconcat("vpcmp", Suffix,
2321 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2322 "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2323 EVEX_4V, EVEX_K, Sched<[sched]>, NotMemoryFoldable;
2325 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2326 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2328 !strconcat("vpcmp", Suffix,
2329 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2330 "$dst {${mask}}, $src1, $src2, $cc}"), []>,
2331 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>,
2335 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2336 (_.VT _.RC:$src1), cond)),
2337 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2338 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2340 def : Pat<(and _.KRCWM:$mask,
2341 (_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2342 (_.VT _.RC:$src1), cond))),
2343 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2344 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2345 (CommFrag.OperandTransform $cc))>;
2348 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2349 PatFrag CommFrag, X86FoldableSchedWrite sched,
2350 X86VectorVTInfo _, string Name> :
2351 avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched, _, Name> {
2352 def rmib : AVX512AIi8<opc, MRMSrcMem,
2353 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2355 !strconcat("vpcmp${cc}", Suffix,
2356 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2357 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2358 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2361 (_.ScalarLdFrag addr:$src2)),
2363 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2364 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2365 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2366 _.ScalarMemOp:$src2, AVX512ICC:$cc),
2367 !strconcat("vpcmp${cc}", Suffix,
2368 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2369 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2370 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2374 (_.ScalarLdFrag addr:$src2)),
2376 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2378 // Accept explicit immediate argument form instead of comparison code.
2379 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2380 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2381 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2383 !strconcat("vpcmp", Suffix,
2384 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2385 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2386 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
2388 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2389 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2390 _.ScalarMemOp:$src2, u8imm:$cc),
2391 !strconcat("vpcmp", Suffix,
2392 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2393 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>,
2394 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
2398 def : Pat<(_.KVT (CommFrag:$cc (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2399 (_.VT _.RC:$src1), cond)),
2400 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2401 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2403 def : Pat<(and _.KRCWM:$mask,
2404 (_.KVT (CommFrag:$cc (X86VBroadcast
2405 (_.ScalarLdFrag addr:$src2)),
2406 (_.VT _.RC:$src1), cond))),
2407 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2408 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2409 (CommFrag.OperandTransform $cc))>;
2412 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2413 PatFrag CommFrag, X86SchedWriteWidths sched,
2414 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2415 let Predicates = [prd] in
2416 defm Z : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.ZMM,
2417 VTInfo.info512, NAME>, EVEX_V512;
2419 let Predicates = [prd, HasVLX] in {
2420 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.YMM,
2421 VTInfo.info256, NAME>, EVEX_V256;
2422 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, CommFrag, sched.XMM,
2423 VTInfo.info128, NAME>, EVEX_V128;
2427 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2428 PatFrag CommFrag, X86SchedWriteWidths sched,
2429 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2430 let Predicates = [prd] in
2431 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.ZMM,
2432 VTInfo.info512, NAME>, EVEX_V512;
2434 let Predicates = [prd, HasVLX] in {
2435 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.YMM,
2436 VTInfo.info256, NAME>, EVEX_V256;
2437 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, CommFrag, sched.XMM,
2438 VTInfo.info128, NAME>, EVEX_V128;
2442 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2443 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2444 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2445 return getI8Imm(SSECC, SDLoc(N));
2448 // Swapped operand version of the above.
2449 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2450 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2451 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2452 SSECC = X86::getSwappedVPCMPImm(SSECC);
2453 return getI8Imm(SSECC, SDLoc(N));
2456 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2457 (setcc node:$src1, node:$src2, node:$cc), [{
2458 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2459 return !ISD::isUnsignedIntSetCC(CC);
2462 // Same as above, but commutes immediate. Use for load folding.
2463 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2464 (setcc node:$src1, node:$src2, node:$cc), [{
2465 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2466 return !ISD::isUnsignedIntSetCC(CC);
2467 }], X86pcmpm_imm_commute>;
2469 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2470 (setcc node:$src1, node:$src2, node:$cc), [{
2471 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2472 return ISD::isUnsignedIntSetCC(CC);
2475 // Same as above, but commutes immediate. Use for load folding.
2476 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2477 (setcc node:$src1, node:$src2, node:$cc), [{
2478 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2479 return ISD::isUnsignedIntSetCC(CC);
2480 }], X86pcmpm_imm_commute>;
2482 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2483 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_commute,
2484 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2486 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_commute,
2487 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2490 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_commute,
2491 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2492 VEX_W, EVEX_CD8<16, CD8VF>;
2493 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_commute,
2494 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2495 VEX_W, EVEX_CD8<16, CD8VF>;
2497 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_commute,
2498 SchedWriteVecALU, avx512vl_i32_info,
2499 HasAVX512>, EVEX_CD8<32, CD8VF>;
2500 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_commute,
2501 SchedWriteVecALU, avx512vl_i32_info,
2502 HasAVX512>, EVEX_CD8<32, CD8VF>;
2504 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_commute,
2505 SchedWriteVecALU, avx512vl_i64_info,
2506 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2507 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_commute,
2508 SchedWriteVecALU, avx512vl_i64_info,
2509 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2511 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2513 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2514 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2515 "vcmp${cc}"#_.Suffix,
2516 "$src2, $src1", "$src1, $src2",
2517 (X86cmpm (_.VT _.RC:$src1),
2522 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2523 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2524 "vcmp${cc}"#_.Suffix,
2525 "$src2, $src1", "$src1, $src2",
2526 (X86cmpm (_.VT _.RC:$src1),
2527 (_.VT (_.LdFrag addr:$src2)),
2529 Sched<[sched.Folded, sched.ReadAfterFold]>;
2531 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2533 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2534 "vcmp${cc}"#_.Suffix,
2535 "${src2}"##_.BroadcastStr##", $src1",
2536 "$src1, ${src2}"##_.BroadcastStr,
2537 (X86cmpm (_.VT _.RC:$src1),
2538 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2540 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2541 // Accept explicit immediate argument form instead of comparison code.
2542 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2543 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2545 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2547 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2548 Sched<[sched]>, NotMemoryFoldable;
2550 let mayLoad = 1 in {
2551 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2553 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2555 "$cc, $src2, $src1", "$src1, $src2, $cc">,
2556 Sched<[sched.Folded, sched.ReadAfterFold]>,
2559 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2561 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2563 "$cc, ${src2}"##_.BroadcastStr##", $src1",
2564 "$src1, ${src2}"##_.BroadcastStr##", $cc">,
2565 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>,
2570 // Patterns for selecting with loads in other operand.
2571 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2572 CommutableCMPCC:$cc),
2573 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2576 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2578 CommutableCMPCC:$cc)),
2579 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2580 _.RC:$src1, addr:$src2,
2583 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2584 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2585 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2588 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2589 (_.ScalarLdFrag addr:$src2)),
2591 CommutableCMPCC:$cc)),
2592 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2593 _.RC:$src1, addr:$src2,
2597 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2598 // comparison code form (VCMP[EQ/LT/LE/...]
2599 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2600 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2601 "vcmp${cc}"#_.Suffix,
2602 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2603 (X86cmpmRnd (_.VT _.RC:$src1),
2606 (i32 FROUND_NO_EXC))>,
2607 EVEX_B, Sched<[sched]>;
2609 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2610 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2612 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2614 "$cc, {sae}, $src2, $src1",
2615 "$src1, $src2, {sae}, $cc">,
2616 EVEX_B, Sched<[sched]>, NotMemoryFoldable;
2620 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2621 let Predicates = [HasAVX512] in {
2622 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2623 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2626 let Predicates = [HasAVX512,HasVLX] in {
2627 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2628 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2632 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2633 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2634 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2635 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2637 // Patterns to select fp compares with load as first operand.
2638 let Predicates = [HasAVX512] in {
2639 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2640 CommutableCMPCC:$cc)),
2641 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2643 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2644 CommutableCMPCC:$cc)),
2645 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2648 // ----------------------------------------------------------------
2650 //handle fpclass instruction mask = op(reg_scalar,imm)
2651 // op(mem_scalar,imm)
2652 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2653 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2655 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2656 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2657 (ins _.RC:$src1, i32u8imm:$src2),
2658 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2659 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2660 (i32 imm:$src2)))]>,
2662 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2663 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2664 OpcodeStr##_.Suffix#
2665 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2666 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2667 (OpNode (_.VT _.RC:$src1),
2668 (i32 imm:$src2))))]>,
2669 EVEX_K, Sched<[sched]>;
2670 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2671 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2672 OpcodeStr##_.Suffix##
2673 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2675 (OpNode _.ScalarIntMemCPat:$src1,
2676 (i32 imm:$src2)))]>,
2677 Sched<[sched.Folded, sched.ReadAfterFold]>;
2678 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2679 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2680 OpcodeStr##_.Suffix##
2681 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2682 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2683 (OpNode _.ScalarIntMemCPat:$src1,
2684 (i32 imm:$src2))))]>,
2685 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2689 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2690 // fpclass(reg_vec, mem_vec, imm)
2691 // fpclass(reg_vec, broadcast(eltVt), imm)
2692 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2693 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2694 string mem, string broadcast>{
2695 let ExeDomain = _.ExeDomain in {
2696 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2697 (ins _.RC:$src1, i32u8imm:$src2),
2698 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2699 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2700 (i32 imm:$src2)))]>,
2702 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2703 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2704 OpcodeStr##_.Suffix#
2705 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2706 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2707 (OpNode (_.VT _.RC:$src1),
2708 (i32 imm:$src2))))]>,
2709 EVEX_K, Sched<[sched]>;
2710 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2711 (ins _.MemOp:$src1, i32u8imm:$src2),
2712 OpcodeStr##_.Suffix##mem#
2713 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2714 [(set _.KRC:$dst,(OpNode
2715 (_.VT (_.LdFrag addr:$src1)),
2716 (i32 imm:$src2)))]>,
2717 Sched<[sched.Folded, sched.ReadAfterFold]>;
2718 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2719 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2720 OpcodeStr##_.Suffix##mem#
2721 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2722 [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode
2723 (_.VT (_.LdFrag addr:$src1)),
2724 (i32 imm:$src2))))]>,
2725 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2726 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2727 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2728 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2729 _.BroadcastStr##", $dst|$dst, ${src1}"
2730 ##_.BroadcastStr##", $src2}",
2731 [(set _.KRC:$dst,(OpNode
2732 (_.VT (X86VBroadcast
2733 (_.ScalarLdFrag addr:$src1))),
2734 (i32 imm:$src2)))]>,
2735 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2736 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2737 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2738 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2739 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2740 _.BroadcastStr##", $src2}",
2741 [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode
2742 (_.VT (X86VBroadcast
2743 (_.ScalarLdFrag addr:$src1))),
2744 (i32 imm:$src2))))]>,
2745 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2749 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2750 bits<8> opc, SDNode OpNode,
2751 X86SchedWriteWidths sched, Predicate prd,
2753 let Predicates = [prd] in {
2754 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.ZMM,
2755 _.info512, "{z}", broadcast>, EVEX_V512;
2757 let Predicates = [prd, HasVLX] in {
2758 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.XMM,
2759 _.info128, "{x}", broadcast>, EVEX_V128;
2760 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched.YMM,
2761 _.info256, "{y}", broadcast>, EVEX_V256;
2765 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2766 bits<8> opcScalar, SDNode VecOpNode,
2767 SDNode ScalarOpNode, X86SchedWriteWidths sched,
2769 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2770 VecOpNode, sched, prd, "{l}">,
2771 EVEX_CD8<32, CD8VF>;
2772 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2773 VecOpNode, sched, prd, "{q}">,
2774 EVEX_CD8<64, CD8VF> , VEX_W;
2775 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2776 sched.Scl, f32x_info, prd>,
2777 EVEX_CD8<32, CD8VT1>;
2778 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2779 sched.Scl, f64x_info, prd>,
2780 EVEX_CD8<64, CD8VT1>, VEX_W;
2783 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2784 X86Vfpclasss, SchedWriteFCmp, HasDQI>,
2785 AVX512AIi8Base, EVEX;
2787 //-----------------------------------------------------------------
2788 // Mask register copy, including
2789 // - copy between mask registers
2790 // - load/store mask registers
2791 // - copy from GPR to mask register and vice versa
2793 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2794 string OpcodeStr, RegisterClass KRC,
2795 ValueType vvt, X86MemOperand x86memop> {
2796 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2797 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2798 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2800 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2801 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2802 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2804 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2805 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2806 [(store KRC:$src, addr:$dst)]>,
2807 Sched<[WriteStore]>;
2810 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2812 RegisterClass KRC, RegisterClass GRC> {
2813 let hasSideEffects = 0 in {
2814 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2815 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2817 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2818 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2823 let Predicates = [HasDQI] in
2824 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2825 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2828 let Predicates = [HasAVX512] in
2829 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2830 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2833 let Predicates = [HasBWI] in {
2834 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2836 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2838 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2840 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2844 // GR from/to mask register
2845 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2846 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2847 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2848 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2850 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2851 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2852 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2853 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2855 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2856 (KMOVWrk VK16:$src)>;
2857 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2858 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2860 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2861 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2862 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2863 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2865 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2866 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2867 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2868 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2869 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2870 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2871 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2872 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2875 let Predicates = [HasDQI] in {
2876 def : Pat<(store VK1:$src, addr:$dst),
2877 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2879 def : Pat<(v1i1 (load addr:$src)),
2880 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2881 def : Pat<(v2i1 (load addr:$src)),
2882 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2883 def : Pat<(v4i1 (load addr:$src)),
2884 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2887 let Predicates = [HasAVX512] in {
2888 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2889 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2890 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2891 (KMOVWkm addr:$src)>;
2894 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2895 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2896 SDTCVecEltisVT<1, i1>,
2899 let Predicates = [HasAVX512] in {
2900 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2901 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2902 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2904 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2905 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2907 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2908 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2910 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2911 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2914 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2915 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2916 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2917 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2918 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2919 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2920 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2922 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2923 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2926 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2930 // Mask unary operation
2932 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2933 RegisterClass KRC, SDPatternOperator OpNode,
2934 X86FoldableSchedWrite sched, Predicate prd> {
2935 let Predicates = [prd] in
2936 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2937 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2938 [(set KRC:$dst, (OpNode KRC:$src))]>,
2942 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2943 SDPatternOperator OpNode,
2944 X86FoldableSchedWrite sched> {
2945 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2946 sched, HasDQI>, VEX, PD;
2947 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2948 sched, HasAVX512>, VEX, PS;
2949 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2950 sched, HasBWI>, VEX, PD, VEX_W;
2951 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2952 sched, HasBWI>, VEX, PS, VEX_W;
2955 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2956 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2958 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2959 let Predicates = [HasAVX512, NoDQI] in
2960 def : Pat<(vnot VK8:$src),
2961 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2963 def : Pat<(vnot VK4:$src),
2964 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2965 def : Pat<(vnot VK2:$src),
2966 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2968 // Mask binary operation
2969 // - KAND, KANDN, KOR, KXNOR, KXOR
2970 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2971 RegisterClass KRC, SDPatternOperator OpNode,
2972 X86FoldableSchedWrite sched, Predicate prd,
2974 let Predicates = [prd], isCommutable = IsCommutable in
2975 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2976 !strconcat(OpcodeStr,
2977 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2978 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2982 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2983 SDPatternOperator OpNode,
2984 X86FoldableSchedWrite sched, bit IsCommutable,
2985 Predicate prdW = HasAVX512> {
2986 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2987 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2988 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2989 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2990 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2991 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2992 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2993 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2996 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2997 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2998 // These nodes use 'vnot' instead of 'not' to support vectors.
2999 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
3000 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3002 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3003 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
3004 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
3005 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
3006 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
3007 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
3008 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3010 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
3012 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3013 // for the DQI set, this type is legal and KxxxB instruction is used
3014 let Predicates = [NoDQI] in
3015 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3017 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3018 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3020 // All types smaller than 8 bits require conversion anyway
3021 def : Pat<(OpNode VK1:$src1, VK1:$src2),
3022 (COPY_TO_REGCLASS (Inst
3023 (COPY_TO_REGCLASS VK1:$src1, VK16),
3024 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3025 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3026 (COPY_TO_REGCLASS (Inst
3027 (COPY_TO_REGCLASS VK2:$src1, VK16),
3028 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
3029 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3030 (COPY_TO_REGCLASS (Inst
3031 (COPY_TO_REGCLASS VK4:$src1, VK16),
3032 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
3035 defm : avx512_binop_pat<and, and, KANDWrr>;
3036 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
3037 defm : avx512_binop_pat<or, or, KORWrr>;
3038 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
3039 defm : avx512_binop_pat<xor, xor, KXORWrr>;
3042 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
3043 RegisterClass KRCSrc, X86FoldableSchedWrite sched,
3045 let Predicates = [prd] in {
3046 let hasSideEffects = 0 in
3047 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
3048 (ins KRC:$src1, KRC:$src2),
3049 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3050 VEX_4V, VEX_L, Sched<[sched]>;
3052 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
3053 (!cast<Instruction>(NAME##rr)
3054 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
3055 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
3059 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD;
3060 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS;
3061 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W;
3064 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3065 SDNode OpNode, X86FoldableSchedWrite sched,
3067 let Predicates = [prd], Defs = [EFLAGS] in
3068 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3069 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3070 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3074 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3075 X86FoldableSchedWrite sched,
3076 Predicate prdW = HasAVX512> {
3077 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3079 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3081 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3083 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3087 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3088 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3089 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3092 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3093 SDNode OpNode, X86FoldableSchedWrite sched> {
3094 let Predicates = [HasAVX512] in
3095 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3096 !strconcat(OpcodeStr,
3097 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3098 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
3102 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3103 SDNode OpNode, X86FoldableSchedWrite sched> {
3104 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3105 sched>, VEX, TAPD, VEX_W;
3106 let Predicates = [HasDQI] in
3107 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3109 let Predicates = [HasBWI] in {
3110 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3111 sched>, VEX, TAPD, VEX_W;
3112 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3117 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3118 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3120 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3121 multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr,
3122 X86VectorVTInfo Narrow,
3123 X86VectorVTInfo Wide> {
3124 def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1),
3125 (Narrow.VT Narrow.RC:$src2))),
3127 (!cast<Instruction>(InstStr#"Zrr")
3128 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3129 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3132 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3133 (Frag (Narrow.VT Narrow.RC:$src1),
3134 (Narrow.VT Narrow.RC:$src2)))),
3136 (!cast<Instruction>(InstStr#"Zrrk")
3137 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3138 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3139 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))),
3143 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3144 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag,
3146 X86VectorVTInfo Narrow,
3147 X86VectorVTInfo Wide> {
3148 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3149 (Narrow.VT Narrow.RC:$src2), cond)),
3151 (!cast<Instruction>(InstStr##Zrri)
3152 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3153 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3154 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3156 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3157 (Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3158 (Narrow.VT Narrow.RC:$src2),
3160 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3161 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3162 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3163 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3164 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3167 // Same as above, but for fp types which don't use PatFrags.
3168 multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
3169 X86VectorVTInfo Narrow,
3170 X86VectorVTInfo Wide> {
3171 def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
3172 (Narrow.VT Narrow.RC:$src2), imm:$cc)),
3174 (!cast<Instruction>(InstStr##Zrri)
3175 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3176 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3177 imm:$cc), Narrow.KRC)>;
3179 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3180 (OpNode (Narrow.VT Narrow.RC:$src1),
3181 (Narrow.VT Narrow.RC:$src2), imm:$cc))),
3182 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3183 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3184 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3185 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3186 imm:$cc), Narrow.KRC)>;
3189 let Predicates = [HasAVX512, NoVLX] in {
3190 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3191 // increase the pattern complexity the way an immediate would.
3192 let AddedComplexity = 2 in {
3193 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
3194 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
3196 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
3197 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
3199 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
3200 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
3202 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
3203 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
3206 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v8i32x_info, v16i32_info>;
3207 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v8i32x_info, v16i32_info>;
3209 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPD", v4i32x_info, v16i32_info>;
3210 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUD", v4i32x_info, v16i32_info>;
3212 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v4i64x_info, v8i64_info>;
3213 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v4i64x_info, v8i64_info>;
3215 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPQ", v2i64x_info, v8i64_info>;
3216 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUQ", v2i64x_info, v8i64_info>;
3218 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
3219 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v4f32x_info, v16f32_info>;
3220 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v4f64x_info, v8f64_info>;
3221 defm : axv512_cmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPD", v2f64x_info, v8f64_info>;
3224 let Predicates = [HasBWI, NoVLX] in {
3225 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
3226 // increase the pattern complexity the way an immediate would.
3227 let AddedComplexity = 2 in {
3228 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
3229 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
3231 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
3232 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
3234 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
3235 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
3237 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
3238 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
3241 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v32i8x_info, v64i8_info>;
3242 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v32i8x_info, v64i8_info>;
3244 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPB", v16i8x_info, v64i8_info>;
3245 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUB", v16i8x_info, v64i8_info>;
3247 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v16i16x_info, v32i16_info>;
3248 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v16i16x_info, v32i16_info>;
3250 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, "VPCMPW", v8i16x_info, v32i16_info>;
3251 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, "VPCMPUW", v8i16x_info, v32i16_info>;
3254 // Mask setting all 0s or 1s
3255 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3256 let Predicates = [HasAVX512] in
3257 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3258 SchedRW = [WriteZero] in
3259 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3260 [(set KRC:$dst, (VT Val))]>;
3263 multiclass avx512_mask_setop_w<PatFrag Val> {
3264 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3265 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3266 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3269 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3270 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3272 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3273 let Predicates = [HasAVX512] in {
3274 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3275 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3276 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3277 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3278 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3279 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3280 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3281 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3284 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3285 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3286 RegisterClass RC, ValueType VT> {
3287 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3288 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3290 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3291 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3293 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3294 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3295 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3296 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3297 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3298 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3300 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3301 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3302 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3303 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3304 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3306 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3307 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3308 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3309 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3311 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3312 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3313 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3315 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3316 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3318 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3320 //===----------------------------------------------------------------------===//
3321 // AVX-512 - Aligned and unaligned load and store
3324 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3325 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3326 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3327 bit NoRMPattern = 0,
3328 SDPatternOperator SelectOprr = vselect> {
3329 let hasSideEffects = 0 in {
3330 let isMoveReg = 1 in
3331 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3332 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3333 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3334 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3335 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3336 (ins _.KRCWM:$mask, _.RC:$src),
3337 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3338 "${dst} {${mask}} {z}, $src}"),
3339 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3341 _.ImmAllZerosV)))], _.ExeDomain>,
3342 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3344 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3345 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3346 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3347 !if(NoRMPattern, [],
3349 (_.VT (ld_frag addr:$src)))]),
3350 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3351 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3353 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3354 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3355 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3356 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3357 "${dst} {${mask}}, $src1}"),
3358 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3360 (_.VT _.RC:$src0))))], _.ExeDomain>,
3361 EVEX, EVEX_K, Sched<[Sched.RR]>;
3362 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3363 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3364 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3365 "${dst} {${mask}}, $src1}"),
3366 [(set _.RC:$dst, (_.VT
3367 (vselect _.KRCWM:$mask,
3368 (_.VT (ld_frag addr:$src1)),
3369 (_.VT _.RC:$src0))))], _.ExeDomain>,
3370 EVEX, EVEX_K, Sched<[Sched.RM]>;
3372 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3373 (ins _.KRCWM:$mask, _.MemOp:$src),
3374 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3375 "${dst} {${mask}} {z}, $src}",
3376 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3377 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3378 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3380 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3381 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3383 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3384 (!cast<Instruction>(Name#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3386 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3387 (!cast<Instruction>(Name#_.ZSuffix##rmk) _.RC:$src0,
3388 _.KRCWM:$mask, addr:$ptr)>;
3391 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3392 AVX512VLVectorVTInfo _, Predicate prd,
3393 X86SchedWriteMoveLSWidths Sched,
3394 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3395 let Predicates = [prd] in
3396 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3397 _.info512.AlignedLdFrag, masked_load_aligned512,
3398 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3400 let Predicates = [prd, HasVLX] in {
3401 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3402 _.info256.AlignedLdFrag, masked_load_aligned256,
3403 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3404 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3405 _.info128.AlignedLdFrag, masked_load_aligned128,
3406 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3410 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3411 AVX512VLVectorVTInfo _, Predicate prd,
3412 X86SchedWriteMoveLSWidths Sched,
3413 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3414 SDPatternOperator SelectOprr = vselect> {
3415 let Predicates = [prd] in
3416 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3417 masked_load_unaligned, Sched.ZMM, "",
3418 NoRMPattern, SelectOprr>, EVEX_V512;
3420 let Predicates = [prd, HasVLX] in {
3421 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3422 masked_load_unaligned, Sched.YMM, EVEX2VEXOvrd#"Y",
3423 NoRMPattern, SelectOprr>, EVEX_V256;
3424 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3425 masked_load_unaligned, Sched.XMM, EVEX2VEXOvrd,
3426 NoRMPattern, SelectOprr>, EVEX_V128;
3430 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3431 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3432 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3433 bit NoMRPattern = 0> {
3434 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3435 let isMoveReg = 1 in
3436 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3437 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3438 [], _.ExeDomain>, EVEX,
3439 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3440 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3441 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3442 (ins _.KRCWM:$mask, _.RC:$src),
3443 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3444 "${dst} {${mask}}, $src}",
3445 [], _.ExeDomain>, EVEX, EVEX_K,
3446 FoldGenData<BaseName#_.ZSuffix#rrk>,
3448 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3449 (ins _.KRCWM:$mask, _.RC:$src),
3450 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3451 "${dst} {${mask}} {z}, $src}",
3452 [], _.ExeDomain>, EVEX, EVEX_KZ,
3453 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3457 let hasSideEffects = 0, mayStore = 1 in
3458 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3459 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3460 !if(NoMRPattern, [],
3461 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3462 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3463 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3464 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3465 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3466 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3467 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3470 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3471 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3472 _.KRCWM:$mask, _.RC:$src)>;
3474 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3475 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3476 _.RC:$dst, _.RC:$src), 0>;
3477 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3478 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3479 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3480 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3481 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3482 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3485 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3486 AVX512VLVectorVTInfo _, Predicate prd,
3487 X86SchedWriteMoveLSWidths Sched,
3488 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3489 let Predicates = [prd] in
3490 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3491 masked_store_unaligned, Sched.ZMM, "",
3492 NoMRPattern>, EVEX_V512;
3493 let Predicates = [prd, HasVLX] in {
3494 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3495 masked_store_unaligned, Sched.YMM,
3496 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3497 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3498 masked_store_unaligned, Sched.XMM, EVEX2VEXOvrd,
3499 NoMRPattern>, EVEX_V128;
3503 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3504 AVX512VLVectorVTInfo _, Predicate prd,
3505 X86SchedWriteMoveLSWidths Sched,
3506 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3507 let Predicates = [prd] in
3508 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3509 masked_store_aligned512, Sched.ZMM, "",
3510 NoMRPattern>, EVEX_V512;
3512 let Predicates = [prd, HasVLX] in {
3513 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3514 masked_store_aligned256, Sched.YMM,
3515 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3516 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3517 masked_store_aligned128, Sched.XMM, EVEX2VEXOvrd,
3518 NoMRPattern>, EVEX_V128;
3522 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3523 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3524 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3525 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3526 PS, EVEX_CD8<32, CD8VF>;
3528 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3529 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3530 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3531 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3532 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3534 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3535 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3536 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3537 SchedWriteFMoveLS, "VMOVUPS">,
3538 PS, EVEX_CD8<32, CD8VF>;
3540 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3541 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3542 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3543 SchedWriteFMoveLS, "VMOVUPD">,
3544 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3546 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3547 HasAVX512, SchedWriteVecMoveLS,
3549 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3550 HasAVX512, SchedWriteVecMoveLS,
3552 PD, EVEX_CD8<32, CD8VF>;
3554 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3555 HasAVX512, SchedWriteVecMoveLS,
3557 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3558 HasAVX512, SchedWriteVecMoveLS,
3560 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3562 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3563 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3564 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3565 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3566 XD, EVEX_CD8<8, CD8VF>;
3568 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3569 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3570 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3571 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3572 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3574 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3575 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3576 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3577 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3578 XS, EVEX_CD8<32, CD8VF>;
3580 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3581 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3582 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3583 SchedWriteVecMoveLS, "VMOVDQU">,
3584 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3586 // Special instructions to help with spilling when we don't have VLX. We need
3587 // to load or store from a ZMM register instead. These are converted in
3588 // expandPostRAPseudos.
3589 let isReMaterializable = 1, canFoldAsLoad = 1,
3590 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3591 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3592 "", []>, Sched<[WriteFLoadX]>;
3593 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3594 "", []>, Sched<[WriteFLoadY]>;
3595 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3596 "", []>, Sched<[WriteFLoadX]>;
3597 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3598 "", []>, Sched<[WriteFLoadY]>;
3601 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3602 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3603 "", []>, Sched<[WriteFStoreX]>;
3604 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3605 "", []>, Sched<[WriteFStoreY]>;
3606 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3607 "", []>, Sched<[WriteFStoreX]>;
3608 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3609 "", []>, Sched<[WriteFStoreY]>;
3612 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3613 (v8i64 VR512:$src))),
3614 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3617 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3618 (v16i32 VR512:$src))),
3619 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3621 // These patterns exist to prevent the above patterns from introducing a second
3622 // mask inversion when one already exists.
3623 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3624 (bc_v8i64 (v16i32 immAllZerosV)),
3625 (v8i64 VR512:$src))),
3626 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3627 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3628 (v16i32 immAllZerosV),
3629 (v16i32 VR512:$src))),
3630 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3632 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3633 X86VectorVTInfo Wide> {
3634 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3635 Narrow.RC:$src1, Narrow.RC:$src0)),
3638 (!cast<Instruction>(InstrStr#"rrk")
3639 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3640 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3641 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3644 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3645 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3648 (!cast<Instruction>(InstrStr#"rrkz")
3649 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3650 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3654 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3655 // available. Use a 512-bit operation and extract.
3656 let Predicates = [HasAVX512, NoVLX] in {
3657 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3658 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3659 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3660 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3662 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3663 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3664 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3665 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3668 let Predicates = [HasBWI, NoVLX] in {
3669 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3670 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3672 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3673 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3676 let Predicates = [HasAVX512] in {
3678 def : Pat<(alignedloadv16i32 addr:$src),
3679 (VMOVDQA64Zrm addr:$src)>;
3680 def : Pat<(alignedloadv32i16 addr:$src),
3681 (VMOVDQA64Zrm addr:$src)>;
3682 def : Pat<(alignedloadv64i8 addr:$src),
3683 (VMOVDQA64Zrm addr:$src)>;
3684 def : Pat<(loadv16i32 addr:$src),
3685 (VMOVDQU64Zrm addr:$src)>;
3686 def : Pat<(loadv32i16 addr:$src),
3687 (VMOVDQU64Zrm addr:$src)>;
3688 def : Pat<(loadv64i8 addr:$src),
3689 (VMOVDQU64Zrm addr:$src)>;
3692 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3693 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3694 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3695 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3696 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3697 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3698 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3699 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3700 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3701 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3702 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3703 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3706 let Predicates = [HasVLX] in {
3708 def : Pat<(alignedloadv4i32 addr:$src),
3709 (VMOVDQA64Z128rm addr:$src)>;
3710 def : Pat<(alignedloadv8i16 addr:$src),
3711 (VMOVDQA64Z128rm addr:$src)>;
3712 def : Pat<(alignedloadv16i8 addr:$src),
3713 (VMOVDQA64Z128rm addr:$src)>;
3714 def : Pat<(loadv4i32 addr:$src),
3715 (VMOVDQU64Z128rm addr:$src)>;
3716 def : Pat<(loadv8i16 addr:$src),
3717 (VMOVDQU64Z128rm addr:$src)>;
3718 def : Pat<(loadv16i8 addr:$src),
3719 (VMOVDQU64Z128rm addr:$src)>;
3722 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3723 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3724 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3725 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3726 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3727 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3728 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3729 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3730 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3731 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3732 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3733 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3736 def : Pat<(alignedloadv8i32 addr:$src),
3737 (VMOVDQA64Z256rm addr:$src)>;
3738 def : Pat<(alignedloadv16i16 addr:$src),
3739 (VMOVDQA64Z256rm addr:$src)>;
3740 def : Pat<(alignedloadv32i8 addr:$src),
3741 (VMOVDQA64Z256rm addr:$src)>;
3742 def : Pat<(loadv8i32 addr:$src),
3743 (VMOVDQU64Z256rm addr:$src)>;
3744 def : Pat<(loadv16i16 addr:$src),
3745 (VMOVDQU64Z256rm addr:$src)>;
3746 def : Pat<(loadv32i8 addr:$src),
3747 (VMOVDQU64Z256rm addr:$src)>;
3750 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3751 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3752 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3753 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3754 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3755 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3756 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3757 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3758 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3759 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3760 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3761 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3764 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3765 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3766 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3768 (To.VT (extract_subvector
3769 (From.VT From.RC:$src), (iPTR 0)))),
3771 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3772 Cast.RC:$src0, Cast.KRCWM:$mask,
3773 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3775 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3777 (To.VT (extract_subvector
3778 (From.VT From.RC:$src), (iPTR 0)))),
3779 Cast.ImmAllZerosV)),
3780 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3782 (To.VT (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx))))>;
3786 let Predicates = [HasVLX] in {
3787 // A masked extract from the first 128-bits of a 256-bit vector can be
3788 // implemented with masked move.
3789 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3790 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3791 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3792 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3793 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3794 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3795 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3796 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3797 defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3798 defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3799 defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3800 defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
3802 // A masked extract from the first 128-bits of a 512-bit vector can be
3803 // implemented with masked move.
3804 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3805 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3806 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3807 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3808 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3809 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3810 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3811 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3812 defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3813 defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3814 defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3815 defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
3817 // A masked extract from the first 256-bits of a 512-bit vector can be
3818 // implemented with masked move.
3819 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3820 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3821 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3822 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3823 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3824 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3825 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3826 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3827 defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3828 defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3829 defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3830 defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
3833 // Move Int Doubleword to Packed Double Int
3835 let ExeDomain = SSEPackedInt in {
3836 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3837 "vmovd\t{$src, $dst|$dst, $src}",
3839 (v4i32 (scalar_to_vector GR32:$src)))]>,
3840 EVEX, Sched<[WriteVecMoveFromGpr]>;
3841 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3842 "vmovd\t{$src, $dst|$dst, $src}",
3844 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3845 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3846 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3847 "vmovq\t{$src, $dst|$dst, $src}",
3849 (v2i64 (scalar_to_vector GR64:$src)))]>,
3850 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3851 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3852 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3854 "vmovq\t{$src, $dst|$dst, $src}", []>,
3855 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3856 let isCodeGenOnly = 1 in {
3857 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3858 "vmovq\t{$src, $dst|$dst, $src}",
3859 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3860 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3861 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3862 "vmovq\t{$src, $dst|$dst, $src}",
3863 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3864 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3865 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3866 "vmovq\t{$src, $dst|$dst, $src}",
3867 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3868 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3869 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3870 "vmovq\t{$src, $dst|$dst, $src}",
3871 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>,
3872 EVEX, VEX_W, Sched<[WriteVecStore]>,
3873 EVEX_CD8<64, CD8VT1>;
3875 } // ExeDomain = SSEPackedInt
3877 // Move Int Doubleword to Single Scalar
3879 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3880 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3881 "vmovd\t{$src, $dst|$dst, $src}",
3882 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3883 EVEX, Sched<[WriteVecMoveFromGpr]>;
3885 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3886 "vmovd\t{$src, $dst|$dst, $src}",
3887 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))]>,
3888 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3889 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3891 // Move doubleword from xmm register to r/m32
3893 let ExeDomain = SSEPackedInt in {
3894 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3895 "vmovd\t{$src, $dst|$dst, $src}",
3896 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3898 EVEX, Sched<[WriteVecMoveToGpr]>;
3899 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3900 (ins i32mem:$dst, VR128X:$src),
3901 "vmovd\t{$src, $dst|$dst, $src}",
3902 [(store (i32 (extractelt (v4i32 VR128X:$src),
3903 (iPTR 0))), addr:$dst)]>,
3904 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3905 } // ExeDomain = SSEPackedInt
3907 // Move quadword from xmm1 register to r/m64
3909 let ExeDomain = SSEPackedInt in {
3910 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3911 "vmovq\t{$src, $dst|$dst, $src}",
3912 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3914 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3915 Requires<[HasAVX512]>;
3917 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3918 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3919 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3920 EVEX, VEX_W, Sched<[WriteVecStore]>,
3921 Requires<[HasAVX512, In64BitMode]>;
3923 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3924 (ins i64mem:$dst, VR128X:$src),
3925 "vmovq\t{$src, $dst|$dst, $src}",
3926 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3928 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3929 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3931 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3932 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3934 "vmovq\t{$src, $dst|$dst, $src}", []>,
3935 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3936 } // ExeDomain = SSEPackedInt
3938 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3939 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3941 // Move Scalar Single to Double Int
3943 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3944 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3946 "vmovd\t{$src, $dst|$dst, $src}",
3947 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3948 EVEX, Sched<[WriteVecMoveToGpr]>;
3949 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3950 (ins i32mem:$dst, FR32X:$src),
3951 "vmovd\t{$src, $dst|$dst, $src}",
3952 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)]>,
3953 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3954 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3956 // Move Quadword Int to Packed Quadword Int
3958 let ExeDomain = SSEPackedInt in {
3959 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3961 "vmovq\t{$src, $dst|$dst, $src}",
3963 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3964 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3965 } // ExeDomain = SSEPackedInt
3967 // Allow "vmovd" but print "vmovq".
3968 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3969 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3970 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3971 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3973 //===----------------------------------------------------------------------===//
3974 // AVX-512 MOVSS, MOVSD
3975 //===----------------------------------------------------------------------===//
3977 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3978 X86VectorVTInfo _> {
3979 let Predicates = [HasAVX512, OptForSize] in
3980 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3981 (ins _.RC:$src1, _.RC:$src2),
3982 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3983 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3984 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3985 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3986 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3987 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3988 "$dst {${mask}} {z}, $src1, $src2}"),
3989 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3990 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3992 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3993 let Constraints = "$src0 = $dst" in
3994 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3995 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3996 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3997 "$dst {${mask}}, $src1, $src2}"),
3998 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3999 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4000 (_.VT _.RC:$src0))))],
4001 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
4002 let canFoldAsLoad = 1, isReMaterializable = 1 in
4003 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
4004 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4005 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
4006 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
4007 let mayLoad = 1, hasSideEffects = 0 in {
4008 let Constraints = "$src0 = $dst" in
4009 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4010 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
4011 !strconcat(asm, "\t{$src, $dst {${mask}}|",
4012 "$dst {${mask}}, $src}"),
4013 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
4014 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
4015 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
4016 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
4017 "$dst {${mask}} {z}, $src}"),
4018 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
4020 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
4021 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
4022 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
4023 EVEX, Sched<[WriteFStore]>;
4024 let mayStore = 1, hasSideEffects = 0 in
4025 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
4026 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
4027 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
4028 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
4032 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
4033 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
4035 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
4036 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4039 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4040 PatLeaf ZeroFP, X86VectorVTInfo _> {
4042 def : Pat<(_.VT (OpNode _.RC:$src0,
4043 (_.VT (scalar_to_vector
4044 (_.EltVT (X86selects VK1WM:$mask,
4045 (_.EltVT _.FRC:$src1),
4046 (_.EltVT _.FRC:$src2))))))),
4047 (!cast<Instruction>(InstrStr#rrk)
4048 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4051 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4053 def : Pat<(_.VT (OpNode _.RC:$src0,
4054 (_.VT (scalar_to_vector
4055 (_.EltVT (X86selects VK1WM:$mask,
4056 (_.EltVT _.FRC:$src1),
4057 (_.EltVT ZeroFP))))))),
4058 (!cast<Instruction>(InstrStr#rrkz)
4061 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4064 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4065 dag Mask, RegisterClass MaskRC> {
4067 def : Pat<(masked_store
4068 (_.info512.VT (insert_subvector undef,
4069 (_.info128.VT _.info128.RC:$src),
4070 (iPTR 0))), addr:$dst, Mask),
4071 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4072 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4073 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4077 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4078 AVX512VLVectorVTInfo _,
4079 dag Mask, RegisterClass MaskRC,
4080 SubRegIndex subreg> {
4082 def : Pat<(masked_store
4083 (_.info512.VT (insert_subvector undef,
4084 (_.info128.VT _.info128.RC:$src),
4085 (iPTR 0))), addr:$dst, Mask),
4086 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4087 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4088 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4092 // This matches the more recent codegen from clang that avoids emitting a 512
4093 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4094 // bits on AVX512F only targets.
4095 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4096 AVX512VLVectorVTInfo _,
4097 dag Mask512, dag Mask128,
4098 RegisterClass MaskRC,
4099 SubRegIndex subreg> {
4102 def : Pat<(masked_store
4103 (_.info512.VT (insert_subvector undef,
4104 (_.info128.VT _.info128.RC:$src),
4105 (iPTR 0))), addr:$dst, Mask512),
4106 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4107 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4108 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4110 // AVX512VL pattern.
4111 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4112 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4113 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4114 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
4117 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4118 dag Mask, RegisterClass MaskRC> {
4120 def : Pat<(_.info128.VT (extract_subvector
4121 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4122 (_.info512.VT (bitconvert
4123 (v16i32 immAllZerosV))))),
4125 (!cast<Instruction>(InstrStr#rmkz)
4126 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4129 def : Pat<(_.info128.VT (extract_subvector
4130 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4131 (_.info512.VT (insert_subvector undef,
4132 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4135 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4136 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4141 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4142 AVX512VLVectorVTInfo _,
4143 dag Mask, RegisterClass MaskRC,
4144 SubRegIndex subreg> {
4146 def : Pat<(_.info128.VT (extract_subvector
4147 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4148 (_.info512.VT (bitconvert
4149 (v16i32 immAllZerosV))))),
4151 (!cast<Instruction>(InstrStr#rmkz)
4152 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4155 def : Pat<(_.info128.VT (extract_subvector
4156 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4157 (_.info512.VT (insert_subvector undef,
4158 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4161 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4162 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4167 // This matches the more recent codegen from clang that avoids emitting a 512
4168 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4169 // bits on AVX512F only targets.
4170 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4171 AVX512VLVectorVTInfo _,
4172 dag Mask512, dag Mask128,
4173 RegisterClass MaskRC,
4174 SubRegIndex subreg> {
4175 // AVX512F patterns.
4176 def : Pat<(_.info128.VT (extract_subvector
4177 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4178 (_.info512.VT (bitconvert
4179 (v16i32 immAllZerosV))))),
4181 (!cast<Instruction>(InstrStr#rmkz)
4182 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4185 def : Pat<(_.info128.VT (extract_subvector
4186 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4187 (_.info512.VT (insert_subvector undef,
4188 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4191 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4192 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4195 // AVX512Vl patterns.
4196 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4197 (_.info128.VT (bitconvert (v4i32 immAllZerosV))))),
4198 (!cast<Instruction>(InstrStr#rmkz)
4199 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4202 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4203 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4204 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4205 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4209 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4210 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4212 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4213 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4214 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4215 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4216 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4217 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4219 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4220 (v16i1 (insert_subvector
4221 (v16i1 immAllZerosV),
4222 (v4i1 (extract_subvector
4223 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4226 (v4i1 (extract_subvector
4227 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4228 (iPTR 0))), GR8, sub_8bit>;
4229 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4234 (v16i1 immAllZerosV),
4235 (v2i1 (extract_subvector
4236 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4240 (v2i1 (extract_subvector
4241 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4242 (iPTR 0))), GR8, sub_8bit>;
4244 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4245 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4246 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4247 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4248 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4249 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4251 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4252 (v16i1 (insert_subvector
4253 (v16i1 immAllZerosV),
4254 (v4i1 (extract_subvector
4255 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4258 (v4i1 (extract_subvector
4259 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4260 (iPTR 0))), GR8, sub_8bit>;
4261 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4266 (v16i1 immAllZerosV),
4267 (v2i1 (extract_subvector
4268 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4272 (v2i1 (extract_subvector
4273 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4274 (iPTR 0))), GR8, sub_8bit>;
4276 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4277 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4278 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4279 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4280 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4282 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4283 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4284 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4286 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4287 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4288 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4289 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4290 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4292 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fpimm0)),
4293 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4294 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4296 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4297 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4298 (ins VR128X:$src1, VR128X:$src2),
4299 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4300 []>, XS, EVEX_4V, VEX_LIG,
4301 FoldGenData<"VMOVSSZrr">,
4302 Sched<[SchedWriteFShuffle.XMM]>;
4304 let Constraints = "$src0 = $dst" in
4305 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4306 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4307 VR128X:$src1, VR128X:$src2),
4308 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4309 "$dst {${mask}}, $src1, $src2}",
4310 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4311 FoldGenData<"VMOVSSZrrk">,
4312 Sched<[SchedWriteFShuffle.XMM]>;
4314 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4315 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4316 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4317 "$dst {${mask}} {z}, $src1, $src2}",
4318 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4319 FoldGenData<"VMOVSSZrrkz">,
4320 Sched<[SchedWriteFShuffle.XMM]>;
4322 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4323 (ins VR128X:$src1, VR128X:$src2),
4324 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4325 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4326 FoldGenData<"VMOVSDZrr">,
4327 Sched<[SchedWriteFShuffle.XMM]>;
4329 let Constraints = "$src0 = $dst" in
4330 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4331 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4332 VR128X:$src1, VR128X:$src2),
4333 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4334 "$dst {${mask}}, $src1, $src2}",
4335 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4336 VEX_W, FoldGenData<"VMOVSDZrrk">,
4337 Sched<[SchedWriteFShuffle.XMM]>;
4339 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4340 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4342 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4343 "$dst {${mask}} {z}, $src1, $src2}",
4344 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4345 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4346 Sched<[SchedWriteFShuffle.XMM]>;
4349 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4350 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4351 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4352 "$dst {${mask}}, $src1, $src2}",
4353 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4354 VR128X:$src1, VR128X:$src2), 0>;
4355 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4356 "$dst {${mask}} {z}, $src1, $src2}",
4357 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4358 VR128X:$src1, VR128X:$src2), 0>;
4359 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4360 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4361 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4362 "$dst {${mask}}, $src1, $src2}",
4363 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4364 VR128X:$src1, VR128X:$src2), 0>;
4365 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4366 "$dst {${mask}} {z}, $src1, $src2}",
4367 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4368 VR128X:$src1, VR128X:$src2), 0>;
4370 let Predicates = [HasAVX512, OptForSize] in {
4371 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4372 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4373 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4374 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4376 // Move low f32 and clear high bits.
4377 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4378 (SUBREG_TO_REG (i32 0),
4379 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4380 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4381 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4382 (SUBREG_TO_REG (i32 0),
4383 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4384 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4386 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4387 (SUBREG_TO_REG (i32 0),
4388 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4389 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4390 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4391 (SUBREG_TO_REG (i32 0),
4392 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4393 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))), sub_xmm)>;
4395 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4396 (SUBREG_TO_REG (i32 0),
4397 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4398 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4399 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4400 (SUBREG_TO_REG (i32 0),
4401 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4402 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4404 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4405 (SUBREG_TO_REG (i32 0),
4406 (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4407 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))), sub_xmm)>;
4409 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4410 (SUBREG_TO_REG (i32 0),
4411 (v2i64 (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4412 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))), sub_xmm)>;
4416 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4417 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4418 let Predicates = [HasAVX512, OptForSpeed] in {
4419 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4420 (SUBREG_TO_REG (i32 0),
4421 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4422 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4423 (i8 1))), sub_xmm)>;
4424 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4425 (SUBREG_TO_REG (i32 0),
4426 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4427 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4428 (i8 3))), sub_xmm)>;
4430 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4431 (SUBREG_TO_REG (i32 0),
4432 (v2f64 (VBLENDPDrri (v2f64 (V_SET0)),
4433 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)),
4434 (i8 1))), sub_xmm)>;
4435 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4436 (SUBREG_TO_REG (i32 0),
4437 (v2i64 (VPBLENDWrri (v2i64 (V_SET0)),
4438 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)),
4439 (i8 0xf))), sub_xmm)>;
4442 let Predicates = [HasAVX512] in {
4444 // MOVSSrm zeros the high parts of the register; represent this
4445 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4446 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4447 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4448 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4449 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4450 def : Pat<(v4f32 (X86vzload addr:$src)),
4451 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4453 // MOVSDrm zeros the high parts of the register; represent this
4454 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4455 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4456 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4457 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4458 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4459 def : Pat<(v2f64 (X86vzload addr:$src)),
4460 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4462 // Represent the same patterns above but in the form they appear for
4464 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4465 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4466 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4467 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4468 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4469 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4470 def : Pat<(v8f32 (X86vzload addr:$src)),
4471 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4472 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4473 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4474 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4475 def : Pat<(v4f64 (X86vzload addr:$src)),
4476 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4478 // Represent the same patterns above but in the form they appear for
4480 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4481 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4482 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4483 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4484 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4485 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4486 def : Pat<(v16f32 (X86vzload addr:$src)),
4487 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4488 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4489 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4490 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4491 def : Pat<(v8f64 (X86vzload addr:$src)),
4492 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4494 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4495 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4496 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4498 // Extract and store.
4499 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4501 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4504 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4505 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4507 "vmovq\t{$src, $dst|$dst, $src}",
4508 [(set VR128X:$dst, (v2i64 (X86vzmovl
4509 (v2i64 VR128X:$src))))]>,
4513 let Predicates = [HasAVX512] in {
4514 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4515 (VMOVDI2PDIZrr GR32:$src)>;
4517 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4518 (VMOV64toPQIZrr GR64:$src)>;
4520 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4521 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4522 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4524 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4525 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4526 (SUBREG_TO_REG (i64 0), (v2i64 (VMOV64toPQIZrr GR64:$src)), sub_xmm)>;
4528 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4529 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4530 (VMOVDI2PDIZrm addr:$src)>;
4531 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4532 (VMOVDI2PDIZrm addr:$src)>;
4533 def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
4534 (VMOVDI2PDIZrm addr:$src)>;
4535 def : Pat<(v4i32 (X86vzload addr:$src)),
4536 (VMOVDI2PDIZrm addr:$src)>;
4537 def : Pat<(v8i32 (X86vzload addr:$src)),
4538 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4539 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4540 (VMOVQI2PQIZrm addr:$src)>;
4541 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4542 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4543 def : Pat<(v2i64 (X86vzload addr:$src)),
4544 (VMOVQI2PQIZrm addr:$src)>;
4545 def : Pat<(v4i64 (X86vzload addr:$src)),
4546 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4548 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4549 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4550 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4551 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4552 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4553 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4554 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrr GR32:$src)), sub_xmm)>;
4556 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4557 def : Pat<(v16i32 (X86vzload addr:$src)),
4558 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4559 def : Pat<(v8i64 (X86vzload addr:$src)),
4560 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4563 //===----------------------------------------------------------------------===//
4564 // AVX-512 - Non-temporals
4565 //===----------------------------------------------------------------------===//
4567 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4568 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4569 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4570 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4572 let Predicates = [HasVLX] in {
4573 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4575 "vmovntdqa\t{$src, $dst|$dst, $src}",
4576 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4577 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4579 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4581 "vmovntdqa\t{$src, $dst|$dst, $src}",
4582 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4583 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4586 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4587 X86SchedWriteMoveLS Sched,
4588 PatFrag st_frag = alignednontemporalstore> {
4589 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4590 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4591 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4592 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4593 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4596 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4597 AVX512VLVectorVTInfo VTInfo,
4598 X86SchedWriteMoveLSWidths Sched> {
4599 let Predicates = [HasAVX512] in
4600 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4602 let Predicates = [HasAVX512, HasVLX] in {
4603 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4604 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4608 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4609 SchedWriteVecMoveLSNT>, PD;
4610 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4611 SchedWriteFMoveLSNT>, PD, VEX_W;
4612 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4613 SchedWriteFMoveLSNT>, PS;
4615 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4616 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4617 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4618 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4619 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4620 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4621 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4623 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4624 (VMOVNTDQAZrm addr:$src)>;
4625 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4626 (VMOVNTDQAZrm addr:$src)>;
4627 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4628 (VMOVNTDQAZrm addr:$src)>;
4629 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4630 (VMOVNTDQAZrm addr:$src)>;
4631 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4632 (VMOVNTDQAZrm addr:$src)>;
4633 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4634 (VMOVNTDQAZrm addr:$src)>;
4637 let Predicates = [HasVLX], AddedComplexity = 400 in {
4638 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4639 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4640 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4641 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4642 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4643 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4645 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4646 (VMOVNTDQAZ256rm addr:$src)>;
4647 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4648 (VMOVNTDQAZ256rm addr:$src)>;
4649 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4650 (VMOVNTDQAZ256rm addr:$src)>;
4651 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4652 (VMOVNTDQAZ256rm addr:$src)>;
4653 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4654 (VMOVNTDQAZ256rm addr:$src)>;
4655 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4656 (VMOVNTDQAZ256rm addr:$src)>;
4658 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4659 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4660 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4661 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4662 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4663 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4665 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4666 (VMOVNTDQAZ128rm addr:$src)>;
4667 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4668 (VMOVNTDQAZ128rm addr:$src)>;
4669 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4670 (VMOVNTDQAZ128rm addr:$src)>;
4671 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4672 (VMOVNTDQAZ128rm addr:$src)>;
4673 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4674 (VMOVNTDQAZ128rm addr:$src)>;
4675 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4676 (VMOVNTDQAZ128rm addr:$src)>;
4679 //===----------------------------------------------------------------------===//
4680 // AVX-512 - Integer arithmetic
4682 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4684 bit IsCommutable = 0> {
4685 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4686 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4687 "$src2, $src1", "$src1, $src2",
4688 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4689 IsCommutable>, AVX512BIBase, EVEX_4V,
4692 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4693 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4694 "$src2, $src1", "$src1, $src2",
4695 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4696 AVX512BIBase, EVEX_4V,
4697 Sched<[sched.Folded, sched.ReadAfterFold]>;
4700 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4701 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4702 bit IsCommutable = 0> :
4703 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4704 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4705 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4706 "${src2}"##_.BroadcastStr##", $src1",
4707 "$src1, ${src2}"##_.BroadcastStr,
4708 (_.VT (OpNode _.RC:$src1,
4710 (_.ScalarLdFrag addr:$src2))))>,
4711 AVX512BIBase, EVEX_4V, EVEX_B,
4712 Sched<[sched.Folded, sched.ReadAfterFold]>;
4715 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4716 AVX512VLVectorVTInfo VTInfo,
4717 X86SchedWriteWidths sched, Predicate prd,
4718 bit IsCommutable = 0> {
4719 let Predicates = [prd] in
4720 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4721 IsCommutable>, EVEX_V512;
4723 let Predicates = [prd, HasVLX] in {
4724 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4725 sched.YMM, IsCommutable>, EVEX_V256;
4726 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4727 sched.XMM, IsCommutable>, EVEX_V128;
4731 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4732 AVX512VLVectorVTInfo VTInfo,
4733 X86SchedWriteWidths sched, Predicate prd,
4734 bit IsCommutable = 0> {
4735 let Predicates = [prd] in
4736 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4737 IsCommutable>, EVEX_V512;
4739 let Predicates = [prd, HasVLX] in {
4740 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4741 sched.YMM, IsCommutable>, EVEX_V256;
4742 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4743 sched.XMM, IsCommutable>, EVEX_V128;
4747 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4748 X86SchedWriteWidths sched, Predicate prd,
4749 bit IsCommutable = 0> {
4750 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4751 sched, prd, IsCommutable>,
4752 VEX_W, EVEX_CD8<64, CD8VF>;
4755 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4756 X86SchedWriteWidths sched, Predicate prd,
4757 bit IsCommutable = 0> {
4758 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4759 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4762 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4763 X86SchedWriteWidths sched, Predicate prd,
4764 bit IsCommutable = 0> {
4765 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4766 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4770 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4771 X86SchedWriteWidths sched, Predicate prd,
4772 bit IsCommutable = 0> {
4773 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4774 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4778 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4779 SDNode OpNode, X86SchedWriteWidths sched,
4780 Predicate prd, bit IsCommutable = 0> {
4781 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4784 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4788 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4789 SDNode OpNode, X86SchedWriteWidths sched,
4790 Predicate prd, bit IsCommutable = 0> {
4791 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4794 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4798 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4799 bits<8> opc_d, bits<8> opc_q,
4800 string OpcodeStr, SDNode OpNode,
4801 X86SchedWriteWidths sched,
4802 bit IsCommutable = 0> {
4803 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4804 sched, HasAVX512, IsCommutable>,
4805 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4806 sched, HasBWI, IsCommutable>;
4809 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4810 X86FoldableSchedWrite sched,
4811 SDNode OpNode,X86VectorVTInfo _Src,
4812 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4813 bit IsCommutable = 0> {
4814 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4815 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4816 "$src2, $src1","$src1, $src2",
4818 (_Src.VT _Src.RC:$src1),
4819 (_Src.VT _Src.RC:$src2))),
4821 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4822 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4823 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4824 "$src2, $src1", "$src1, $src2",
4825 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4826 (_Src.LdFrag addr:$src2)))>,
4827 AVX512BIBase, EVEX_4V,
4828 Sched<[sched.Folded, sched.ReadAfterFold]>;
4830 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4831 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4833 "${src2}"##_Brdct.BroadcastStr##", $src1",
4834 "$src1, ${src2}"##_Brdct.BroadcastStr,
4835 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4836 (_Brdct.VT (X86VBroadcast
4837 (_Brdct.ScalarLdFrag addr:$src2))))))>,
4838 AVX512BIBase, EVEX_4V, EVEX_B,
4839 Sched<[sched.Folded, sched.ReadAfterFold]>;
4842 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4843 SchedWriteVecALU, 1>;
4844 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4845 SchedWriteVecALU, 0>;
4846 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4847 SchedWriteVecALU, HasBWI, 1>;
4848 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4849 SchedWriteVecALU, HasBWI, 0>;
4850 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4851 SchedWriteVecALU, HasBWI, 1>;
4852 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4853 SchedWriteVecALU, HasBWI, 0>;
4854 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4855 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4856 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4857 SchedWriteVecIMul, HasBWI, 1>;
4858 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4859 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4860 NotEVEX2VEXConvertible;
4861 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4863 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4865 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4866 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4867 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4868 SchedWriteVecALU, HasBWI, 1>;
4869 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4870 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4871 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4872 SchedWriteVecIMul, HasAVX512, 1>;
4874 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4875 X86SchedWriteWidths sched,
4876 AVX512VLVectorVTInfo _SrcVTInfo,
4877 AVX512VLVectorVTInfo _DstVTInfo,
4878 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4879 let Predicates = [prd] in
4880 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4881 _SrcVTInfo.info512, _DstVTInfo.info512,
4882 v8i64_info, IsCommutable>,
4883 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4884 let Predicates = [HasVLX, prd] in {
4885 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4886 _SrcVTInfo.info256, _DstVTInfo.info256,
4887 v4i64x_info, IsCommutable>,
4888 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4889 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4890 _SrcVTInfo.info128, _DstVTInfo.info128,
4891 v2i64x_info, IsCommutable>,
4892 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4896 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4897 avx512vl_i8_info, avx512vl_i8_info,
4898 X86multishift, HasVBMI, 0>, T8PD;
4900 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4901 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4902 X86FoldableSchedWrite sched> {
4903 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4904 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4906 "${src2}"##_Src.BroadcastStr##", $src1",
4907 "$src1, ${src2}"##_Src.BroadcastStr,
4908 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4909 (_Src.VT (X86VBroadcast
4910 (_Src.ScalarLdFrag addr:$src2))))))>,
4911 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4912 Sched<[sched.Folded, sched.ReadAfterFold]>;
4915 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4916 SDNode OpNode,X86VectorVTInfo _Src,
4917 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4918 bit IsCommutable = 0> {
4919 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4920 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4921 "$src2, $src1","$src1, $src2",
4923 (_Src.VT _Src.RC:$src1),
4924 (_Src.VT _Src.RC:$src2))),
4926 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4927 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4928 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4929 "$src2, $src1", "$src1, $src2",
4930 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4931 (_Src.LdFrag addr:$src2)))>,
4932 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4933 Sched<[sched.Folded, sched.ReadAfterFold]>;
4936 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4938 let Predicates = [HasBWI] in
4939 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4940 v32i16_info, SchedWriteShuffle.ZMM>,
4941 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4942 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4943 let Predicates = [HasBWI, HasVLX] in {
4944 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4945 v16i16x_info, SchedWriteShuffle.YMM>,
4946 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4947 v16i16x_info, SchedWriteShuffle.YMM>,
4949 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4950 v8i16x_info, SchedWriteShuffle.XMM>,
4951 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4952 v8i16x_info, SchedWriteShuffle.XMM>,
4956 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4958 let Predicates = [HasBWI] in
4959 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4960 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4961 let Predicates = [HasBWI, HasVLX] in {
4962 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4963 v32i8x_info, SchedWriteShuffle.YMM>,
4965 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4966 v16i8x_info, SchedWriteShuffle.XMM>,
4971 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4972 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4973 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4974 let Predicates = [HasBWI] in
4975 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4976 _Dst.info512, SchedWriteVecIMul.ZMM,
4977 IsCommutable>, EVEX_V512;
4978 let Predicates = [HasBWI, HasVLX] in {
4979 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4980 _Dst.info256, SchedWriteVecIMul.YMM,
4981 IsCommutable>, EVEX_V256;
4982 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4983 _Dst.info128, SchedWriteVecIMul.XMM,
4984 IsCommutable>, EVEX_V128;
4988 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4989 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4990 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4991 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4993 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4994 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4995 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4996 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4998 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4999 SchedWriteVecALU, HasBWI, 1>, T8PD;
5000 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
5001 SchedWriteVecALU, HasBWI, 1>;
5002 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
5003 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5004 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
5005 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5006 NotEVEX2VEXConvertible;
5008 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
5009 SchedWriteVecALU, HasBWI, 1>;
5010 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
5011 SchedWriteVecALU, HasBWI, 1>, T8PD;
5012 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
5013 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5014 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
5015 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5016 NotEVEX2VEXConvertible;
5018 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
5019 SchedWriteVecALU, HasBWI, 1>, T8PD;
5020 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
5021 SchedWriteVecALU, HasBWI, 1>;
5022 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
5023 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5024 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
5025 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5026 NotEVEX2VEXConvertible;
5028 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
5029 SchedWriteVecALU, HasBWI, 1>;
5030 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
5031 SchedWriteVecALU, HasBWI, 1>, T8PD;
5032 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
5033 SchedWriteVecALU, HasAVX512, 1>, T8PD;
5034 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
5035 SchedWriteVecALU, HasAVX512, 1>, T8PD,
5036 NotEVEX2VEXConvertible;
5038 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5039 let Predicates = [HasDQI, NoVLX] in {
5040 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5043 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5044 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5047 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5050 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5051 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5055 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
5056 let Predicates = [HasDQI, NoVLX] in {
5057 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5060 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5061 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5064 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5067 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5068 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5072 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
5073 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
5076 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5077 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5080 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
5083 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5084 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5088 let Predicates = [HasAVX512, NoVLX] in {
5089 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
5090 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
5091 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
5092 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
5095 //===----------------------------------------------------------------------===//
5096 // AVX-512 Logical Instructions
5097 //===----------------------------------------------------------------------===//
5099 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5100 SchedWriteVecLogic, HasAVX512, 1>;
5101 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5102 SchedWriteVecLogic, HasAVX512, 1>;
5103 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5104 SchedWriteVecLogic, HasAVX512, 1>;
5105 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5106 SchedWriteVecLogic, HasAVX512>;
5108 let Predicates = [HasVLX] in {
5109 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5110 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5111 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5112 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5114 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5115 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5116 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5117 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5119 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5120 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5121 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5122 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5124 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5125 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5126 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5127 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5129 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5130 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5131 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5132 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5134 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5135 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5136 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5137 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5139 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5140 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5141 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5142 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5144 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5145 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5146 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5147 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5149 def : Pat<(and VR128X:$src1,
5150 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
5151 (VPANDDZ128rmb VR128X:$src1, addr:$src2)>;
5152 def : Pat<(or VR128X:$src1,
5153 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
5154 (VPORDZ128rmb VR128X:$src1, addr:$src2)>;
5155 def : Pat<(xor VR128X:$src1,
5156 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
5157 (VPXORDZ128rmb VR128X:$src1, addr:$src2)>;
5158 def : Pat<(X86andnp VR128X:$src1,
5159 (bc_v4i32 (v4f32 (X86VBroadcast (loadf32 addr:$src2))))),
5160 (VPANDNDZ128rmb VR128X:$src1, addr:$src2)>;
5162 def : Pat<(and VR128X:$src1,
5163 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5164 (VPANDQZ128rmb VR128X:$src1, addr:$src2)>;
5165 def : Pat<(or VR128X:$src1,
5166 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5167 (VPORQZ128rmb VR128X:$src1, addr:$src2)>;
5168 def : Pat<(xor VR128X:$src1,
5169 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5170 (VPXORQZ128rmb VR128X:$src1, addr:$src2)>;
5171 def : Pat<(X86andnp VR128X:$src1,
5172 (bc_v2i64 (v2f64 (X86VBroadcast (loadf64 addr:$src2))))),
5173 (VPANDNQZ128rmb VR128X:$src1, addr:$src2)>;
5175 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5176 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5177 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5178 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5180 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5181 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5182 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5183 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5185 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5186 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5187 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5188 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5190 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5191 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5192 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5193 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5195 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5196 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5197 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5198 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5200 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5201 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5202 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5203 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5205 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5206 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5207 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5208 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5210 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5211 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5212 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5213 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5215 def : Pat<(and VR256X:$src1,
5216 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5217 (VPANDDZ256rmb VR256X:$src1, addr:$src2)>;
5218 def : Pat<(or VR256X:$src1,
5219 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5220 (VPORDZ256rmb VR256X:$src1, addr:$src2)>;
5221 def : Pat<(xor VR256X:$src1,
5222 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5223 (VPXORDZ256rmb VR256X:$src1, addr:$src2)>;
5224 def : Pat<(X86andnp VR256X:$src1,
5225 (bc_v8i32 (v8f32 (X86VBroadcast (loadf32 addr:$src2))))),
5226 (VPANDNDZ256rmb VR256X:$src1, addr:$src2)>;
5228 def : Pat<(and VR256X:$src1,
5229 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5230 (VPANDQZ256rmb VR256X:$src1, addr:$src2)>;
5231 def : Pat<(or VR256X:$src1,
5232 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5233 (VPORQZ256rmb VR256X:$src1, addr:$src2)>;
5234 def : Pat<(xor VR256X:$src1,
5235 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5236 (VPXORQZ256rmb VR256X:$src1, addr:$src2)>;
5237 def : Pat<(X86andnp VR256X:$src1,
5238 (bc_v4i64 (v4f64 (X86VBroadcast (loadf64 addr:$src2))))),
5239 (VPANDNQZ256rmb VR256X:$src1, addr:$src2)>;
5242 let Predicates = [HasAVX512] in {
5243 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5244 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5245 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5246 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5248 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5249 (VPORQZrr VR512:$src1, VR512:$src2)>;
5250 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5251 (VPORQZrr VR512:$src1, VR512:$src2)>;
5253 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5254 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5255 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5256 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5258 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5259 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5260 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5261 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5263 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5264 (VPANDQZrm VR512:$src1, addr:$src2)>;
5265 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5266 (VPANDQZrm VR512:$src1, addr:$src2)>;
5268 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5269 (VPORQZrm VR512:$src1, addr:$src2)>;
5270 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5271 (VPORQZrm VR512:$src1, addr:$src2)>;
5273 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5274 (VPXORQZrm VR512:$src1, addr:$src2)>;
5275 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5276 (VPXORQZrm VR512:$src1, addr:$src2)>;
5278 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5279 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5280 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5281 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5283 def : Pat<(and VR512:$src1,
5284 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5285 (VPANDDZrmb VR512:$src1, addr:$src2)>;
5286 def : Pat<(or VR512:$src1,
5287 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5288 (VPORDZrmb VR512:$src1, addr:$src2)>;
5289 def : Pat<(xor VR512:$src1,
5290 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5291 (VPXORDZrmb VR512:$src1, addr:$src2)>;
5292 def : Pat<(X86andnp VR512:$src1,
5293 (bc_v16i32 (v16f32 (X86VBroadcast (loadf32 addr:$src2))))),
5294 (VPANDNDZrmb VR512:$src1, addr:$src2)>;
5296 def : Pat<(and VR512:$src1,
5297 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5298 (VPANDQZrmb VR512:$src1, addr:$src2)>;
5299 def : Pat<(or VR512:$src1,
5300 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5301 (VPORQZrmb VR512:$src1, addr:$src2)>;
5302 def : Pat<(xor VR512:$src1,
5303 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5304 (VPXORQZrmb VR512:$src1, addr:$src2)>;
5305 def : Pat<(X86andnp VR512:$src1,
5306 (bc_v8i64 (v8f64 (X86VBroadcast (loadf64 addr:$src2))))),
5307 (VPANDNQZrmb VR512:$src1, addr:$src2)>;
5310 // Patterns to catch vselect with different type than logic op.
5311 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5313 X86VectorVTInfo IntInfo> {
5314 // Masked register-register logical operations.
5315 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5316 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5318 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5319 _.RC:$src1, _.RC:$src2)>;
5321 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5322 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5324 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5327 // Masked register-memory logical operations.
5328 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5329 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5330 (load addr:$src2)))),
5332 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5333 _.RC:$src1, addr:$src2)>;
5334 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5335 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5336 (load addr:$src2)))),
5338 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5342 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5344 X86VectorVTInfo IntInfo> {
5345 // Register-broadcast logical operations.
5346 def : Pat<(IntInfo.VT (OpNode _.RC:$src1,
5347 (bitconvert (_.VT (X86VBroadcast
5348 (_.ScalarLdFrag addr:$src2)))))),
5349 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5350 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5352 (IntInfo.VT (OpNode _.RC:$src1,
5355 (_.ScalarLdFrag addr:$src2))))))),
5357 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5358 _.RC:$src1, addr:$src2)>;
5359 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5361 (IntInfo.VT (OpNode _.RC:$src1,
5364 (_.ScalarLdFrag addr:$src2))))))),
5366 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5367 _.RC:$src1, addr:$src2)>;
5370 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5371 AVX512VLVectorVTInfo SelectInfo,
5372 AVX512VLVectorVTInfo IntInfo> {
5373 let Predicates = [HasVLX] in {
5374 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5376 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5379 let Predicates = [HasAVX512] in {
5380 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5385 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5386 AVX512VLVectorVTInfo SelectInfo,
5387 AVX512VLVectorVTInfo IntInfo> {
5388 let Predicates = [HasVLX] in {
5389 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5390 SelectInfo.info128, IntInfo.info128>;
5391 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5392 SelectInfo.info256, IntInfo.info256>;
5394 let Predicates = [HasAVX512] in {
5395 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5396 SelectInfo.info512, IntInfo.info512>;
5400 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5401 // i64 vselect with i32/i16/i8 logic op
5402 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5404 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5406 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5409 // i32 vselect with i64/i16/i8 logic op
5410 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5412 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5414 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5417 // f32 vselect with i64/i32/i16/i8 logic op
5418 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5420 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5422 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5424 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5427 // f64 vselect with i64/i32/i16/i8 logic op
5428 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5430 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5432 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5434 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5437 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5440 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5445 defm : avx512_logical_lowering_types<"VPAND", and>;
5446 defm : avx512_logical_lowering_types<"VPOR", or>;
5447 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5448 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5450 //===----------------------------------------------------------------------===//
5451 // AVX-512 FP arithmetic
5452 //===----------------------------------------------------------------------===//
5454 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5455 SDNode OpNode, SDNode VecNode,
5456 X86FoldableSchedWrite sched, bit IsCommutable> {
5457 let ExeDomain = _.ExeDomain in {
5458 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5459 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5460 "$src2, $src1", "$src1, $src2",
5461 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
5462 (i32 FROUND_CURRENT)))>,
5465 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5466 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5467 "$src2, $src1", "$src1, $src2",
5468 (_.VT (VecNode _.RC:$src1,
5469 _.ScalarIntMemCPat:$src2,
5470 (i32 FROUND_CURRENT)))>,
5471 Sched<[sched.Folded, sched.ReadAfterFold]>;
5472 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5473 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5474 (ins _.FRC:$src1, _.FRC:$src2),
5475 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5476 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5478 let isCommutable = IsCommutable;
5480 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5481 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5482 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5483 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5484 (_.ScalarLdFrag addr:$src2)))]>,
5485 Sched<[sched.Folded, sched.ReadAfterFold]>;
5490 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5491 SDNode VecNode, X86FoldableSchedWrite sched,
5492 bit IsCommutable = 0> {
5493 let ExeDomain = _.ExeDomain in
5494 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5495 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5496 "$rc, $src2, $src1", "$src1, $src2, $rc",
5497 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5498 (i32 imm:$rc)), IsCommutable>,
5499 EVEX_B, EVEX_RC, Sched<[sched]>;
5501 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5502 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5503 X86FoldableSchedWrite sched, bit IsCommutable> {
5504 let ExeDomain = _.ExeDomain in {
5505 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5506 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5507 "$src2, $src1", "$src1, $src2",
5508 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5511 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5512 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5513 "$src2, $src1", "$src1, $src2",
5514 (_.VT (VecNode _.RC:$src1,
5515 _.ScalarIntMemCPat:$src2))>,
5516 Sched<[sched.Folded, sched.ReadAfterFold]>;
5518 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5519 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5520 (ins _.FRC:$src1, _.FRC:$src2),
5521 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5522 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5524 let isCommutable = IsCommutable;
5526 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5527 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5528 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5529 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5530 (_.ScalarLdFrag addr:$src2)))]>,
5531 Sched<[sched.Folded, sched.ReadAfterFold]>;
5534 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5535 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5536 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5537 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5538 (i32 FROUND_NO_EXC))>, EVEX_B,
5543 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5544 SDNode VecNode, X86SchedWriteSizes sched,
5546 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5547 sched.PS.Scl, IsCommutable>,
5548 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
5549 sched.PS.Scl, IsCommutable>,
5550 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5551 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5552 sched.PD.Scl, IsCommutable>,
5553 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
5554 sched.PD.Scl, IsCommutable>,
5555 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5558 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5559 SDNode VecNode, SDNode SaeNode,
5560 X86SchedWriteSizes sched, bit IsCommutable> {
5561 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5562 VecNode, SaeNode, sched.PS.Scl, IsCommutable>,
5563 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5564 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5565 VecNode, SaeNode, sched.PD.Scl, IsCommutable>,
5566 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5568 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds,
5569 SchedWriteFAddSizes, 1>;
5570 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds,
5571 SchedWriteFMulSizes, 1>;
5572 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds,
5573 SchedWriteFAddSizes, 0>;
5574 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds,
5575 SchedWriteFDivSizes, 0>;
5576 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
5577 SchedWriteFCmpSizes, 0>;
5578 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
5579 SchedWriteFCmpSizes, 0>;
5581 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5582 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5583 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5584 X86VectorVTInfo _, SDNode OpNode,
5585 X86FoldableSchedWrite sched> {
5586 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5587 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5588 (ins _.FRC:$src1, _.FRC:$src2),
5589 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5590 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5592 let isCommutable = 1;
5594 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5595 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5596 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5597 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5598 (_.ScalarLdFrag addr:$src2)))]>,
5599 Sched<[sched.Folded, sched.ReadAfterFold]>;
5602 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5603 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5604 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5606 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5607 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5608 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5610 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5611 SchedWriteFCmp.Scl>, XS, EVEX_4V,
5612 VEX_LIG, EVEX_CD8<32, CD8VT1>;
5614 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5615 SchedWriteFCmp.Scl>, XD, VEX_W, EVEX_4V,
5616 VEX_LIG, EVEX_CD8<64, CD8VT1>;
5618 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5619 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5621 bit IsKZCommutable = IsCommutable> {
5622 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5623 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5624 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5625 "$src2, $src1", "$src1, $src2",
5626 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable, 0,
5628 EVEX_4V, Sched<[sched]>;
5629 let mayLoad = 1 in {
5630 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5631 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5632 "$src2, $src1", "$src1, $src2",
5633 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5634 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5635 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5636 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5637 "${src2}"##_.BroadcastStr##", $src1",
5638 "$src1, ${src2}"##_.BroadcastStr,
5639 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5640 (_.ScalarLdFrag addr:$src2))))>,
5642 Sched<[sched.Folded, sched.ReadAfterFold]>;
5647 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5648 SDPatternOperator OpNodeRnd,
5649 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5650 let ExeDomain = _.ExeDomain in
5651 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5652 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
5653 "$rc, $src2, $src1", "$src1, $src2, $rc",
5654 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
5655 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5658 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5659 SDPatternOperator OpNodeRnd,
5660 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5661 let ExeDomain = _.ExeDomain in
5662 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5663 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5664 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5665 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
5666 EVEX_4V, EVEX_B, Sched<[sched]>;
5669 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5670 Predicate prd, X86SchedWriteSizes sched,
5671 bit IsCommutable = 0,
5672 bit IsPD128Commutable = IsCommutable> {
5673 let Predicates = [prd] in {
5674 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5675 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5676 EVEX_CD8<32, CD8VF>;
5677 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5678 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5679 EVEX_CD8<64, CD8VF>;
5682 // Define only if AVX512VL feature is present.
5683 let Predicates = [prd, HasVLX] in {
5684 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5685 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5686 EVEX_CD8<32, CD8VF>;
5687 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5688 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5689 EVEX_CD8<32, CD8VF>;
5690 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5691 sched.PD.XMM, IsPD128Commutable,
5692 IsCommutable>, EVEX_V128, PD, VEX_W,
5693 EVEX_CD8<64, CD8VF>;
5694 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5695 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5696 EVEX_CD8<64, CD8VF>;
5700 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5701 X86SchedWriteSizes sched> {
5702 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5704 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5705 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5707 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5710 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5711 X86SchedWriteSizes sched> {
5712 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5714 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5715 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5717 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5720 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5721 SchedWriteFAddSizes, 1>,
5722 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5723 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5724 SchedWriteFMulSizes, 1>,
5725 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5726 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512,
5727 SchedWriteFAddSizes>,
5728 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5729 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512,
5730 SchedWriteFDivSizes>,
5731 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5732 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5733 SchedWriteFCmpSizes, 0>,
5734 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmpSizes>;
5735 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5736 SchedWriteFCmpSizes, 0>,
5737 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmpSizes>;
5738 let isCodeGenOnly = 1 in {
5739 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5740 SchedWriteFCmpSizes, 1>;
5741 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5742 SchedWriteFCmpSizes, 1>;
5744 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5745 SchedWriteFLogicSizes, 1>;
5746 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5747 SchedWriteFLogicSizes, 0>;
5748 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5749 SchedWriteFLogicSizes, 1>;
5750 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5751 SchedWriteFLogicSizes, 1>;
5753 let Predicates = [HasVLX,HasDQI] in {
5754 // Use packed logical operations for scalar ops.
5755 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5757 (v2f64 (VANDPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5758 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5760 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5762 (v2f64 (VORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5763 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5765 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5767 (v2f64 (VXORPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5768 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5770 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5772 (v2f64 (VANDNPDZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
5773 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)))),
5776 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5778 (v4f32 (VANDPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5779 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5781 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5783 (v4f32 (VORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5784 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5786 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5788 (v4f32 (VXORPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5789 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5791 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5793 (v4f32 (VANDNPSZ128rr (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
5794 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)))),
5798 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5799 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5800 let ExeDomain = _.ExeDomain in {
5801 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5802 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5803 "$src2, $src1", "$src1, $src2",
5804 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5805 EVEX_4V, Sched<[sched]>;
5806 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5807 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5808 "$src2, $src1", "$src1, $src2",
5809 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>,
5810 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5811 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5812 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5813 "${src2}"##_.BroadcastStr##", $src1",
5814 "$src1, ${src2}"##_.BroadcastStr,
5815 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5816 (_.ScalarLdFrag addr:$src2))),
5817 (i32 FROUND_CURRENT))>,
5818 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5822 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5823 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5824 let ExeDomain = _.ExeDomain in {
5825 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5826 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5827 "$src2, $src1", "$src1, $src2",
5828 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>,
5830 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5831 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5832 "$src2, $src1", "$src1, $src2",
5833 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5834 (i32 FROUND_CURRENT))>,
5835 Sched<[sched.Folded, sched.ReadAfterFold]>;
5839 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5840 SDNode OpNode, SDNode OpNodeScal,
5841 X86SchedWriteWidths sched> {
5842 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5843 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v16f32_info>,
5844 EVEX_V512, EVEX_CD8<32, CD8VF>;
5845 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5846 avx512_fp_round_packed<opc, OpcodeStr, OpNode, sched.ZMM, v8f64_info>,
5847 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5848 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f32x_info>,
5849 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, sched.Scl>,
5850 EVEX_4V,EVEX_CD8<32, CD8VT1>;
5851 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, sched.Scl, f64x_info>,
5852 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, sched.Scl>,
5853 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5855 // Define only if AVX512VL feature is present.
5856 let Predicates = [HasVLX] in {
5857 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v4f32x_info>,
5858 EVEX_V128, EVEX_CD8<32, CD8VF>;
5859 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v8f32x_info>,
5860 EVEX_V256, EVEX_CD8<32, CD8VF>;
5861 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.XMM, v2f64x_info>,
5862 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5863 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, sched.YMM, v4f64x_info>,
5864 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5867 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs,
5868 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5870 //===----------------------------------------------------------------------===//
5871 // AVX-512 VPTESTM instructions
5872 //===----------------------------------------------------------------------===//
5874 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5875 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5877 let ExeDomain = _.ExeDomain in {
5878 let isCommutable = 1 in
5879 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5880 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5881 "$src2, $src1", "$src1, $src2",
5882 (OpNode (and _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)>,
5883 EVEX_4V, Sched<[sched]>;
5884 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5885 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5886 "$src2, $src1", "$src1, $src2",
5887 (OpNode (and _.RC:$src1, (_.LdFrag addr:$src2)),
5889 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5890 Sched<[sched.Folded, sched.ReadAfterFold]>;
5893 // Patterns for compare with 0 that just use the same source twice.
5894 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5895 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rr")
5896 _.RC:$src, _.RC:$src))>;
5898 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5899 (_.KVT (!cast<Instruction>(Name # _.ZSuffix # "rrk")
5900 _.KRC:$mask, _.RC:$src, _.RC:$src))>;
5903 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5904 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5905 let ExeDomain = _.ExeDomain in
5906 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5907 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5908 "${src2}"##_.BroadcastStr##", $src1",
5909 "$src1, ${src2}"##_.BroadcastStr,
5910 (OpNode (and _.RC:$src1,
5912 (_.ScalarLdFrag addr:$src2))),
5914 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5915 Sched<[sched.Folded, sched.ReadAfterFold]>;
5918 // Use 512bit version to implement 128/256 bit in case NoVLX.
5919 multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo,
5920 X86VectorVTInfo _, string Name> {
5921 def : Pat<(_.KVT (OpNode (and _.RC:$src1, _.RC:$src2),
5923 (_.KVT (COPY_TO_REGCLASS
5924 (!cast<Instruction>(Name # "Zrr")
5925 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5926 _.RC:$src1, _.SubRegIdx),
5927 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5928 _.RC:$src2, _.SubRegIdx)),
5931 def : Pat<(_.KVT (and _.KRC:$mask,
5932 (OpNode (and _.RC:$src1, _.RC:$src2),
5935 (!cast<Instruction>(Name # "Zrrk")
5936 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5937 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5938 _.RC:$src1, _.SubRegIdx),
5939 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5940 _.RC:$src2, _.SubRegIdx)),
5943 def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)),
5944 (_.KVT (COPY_TO_REGCLASS
5945 (!cast<Instruction>(Name # "Zrr")
5946 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5947 _.RC:$src, _.SubRegIdx),
5948 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5949 _.RC:$src, _.SubRegIdx)),
5952 def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))),
5954 (!cast<Instruction>(Name # "Zrrk")
5955 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
5956 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5957 _.RC:$src, _.SubRegIdx),
5958 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5959 _.RC:$src, _.SubRegIdx)),
5963 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5964 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5965 let Predicates = [HasAVX512] in
5966 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, NAME>,
5967 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5969 let Predicates = [HasAVX512, HasVLX] in {
5970 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, NAME>,
5971 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5972 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, NAME>,
5973 avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5975 let Predicates = [HasAVX512, NoVLX] in {
5976 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, NAME>;
5977 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, NAME>;
5981 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode,
5982 X86SchedWriteWidths sched> {
5983 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched,
5985 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched,
5986 avx512vl_i64_info>, VEX_W;
5989 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5990 PatFrag OpNode, X86SchedWriteWidths sched> {
5991 let Predicates = [HasBWI] in {
5992 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM,
5993 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5994 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM,
5995 v64i8_info, NAME#"B">, EVEX_V512;
5997 let Predicates = [HasVLX, HasBWI] in {
5999 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM,
6000 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
6001 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM,
6002 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
6003 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM,
6004 v32i8x_info, NAME#"B">, EVEX_V256;
6005 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM,
6006 v16i8x_info, NAME#"B">, EVEX_V128;
6009 let Predicates = [HasBWI, NoVLX] in {
6010 defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, NAME#"B">;
6011 defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, NAME#"B">;
6012 defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, NAME#"W">;
6013 defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, NAME#"W">;
6017 // These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
6018 // as commutable here because we already canonicalized all zeros vectors to the
6019 // RHS during lowering.
6020 def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
6021 (setcc node:$src1, node:$src2, SETEQ)>;
6022 def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
6023 (setcc node:$src1, node:$src2, SETNE)>;
6025 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
6026 PatFrag OpNode, X86SchedWriteWidths sched> :
6027 avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>,
6028 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>;
6030 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem,
6031 SchedWriteVecLogic>, T8PD;
6032 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm,
6033 SchedWriteVecLogic>, T8XS;
6036 multiclass avx512_vptest_lowering_pats<string InstrStr, PatFrag OpNode,
6038 X86VectorVTInfo AndInfo> {
6039 def : Pat<(_.KVT (OpNode (bitconvert
6040 (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
6042 (!cast<Instruction>(InstrStr # "rr") _.RC:$src1, _.RC:$src2)>;
6044 def : Pat<(_.KVT (and _.KRC:$mask,
6046 (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
6048 (!cast<Instruction>(InstrStr # "rrk") _.KRC:$mask, _.RC:$src1,
6051 def : Pat<(_.KVT (OpNode (bitconvert
6052 (AndInfo.VT (and _.RC:$src1,
6053 (AndInfo.LdFrag addr:$src2)))),
6055 (!cast<Instruction>(InstrStr # "rm") _.RC:$src1, addr:$src2)>;
6057 def : Pat<(_.KVT (and _.KRC:$mask,
6059 (AndInfo.VT (and _.RC:$src1,
6060 (AndInfo.LdFrag addr:$src2)))),
6062 (!cast<Instruction>(InstrStr # "rmk") _.KRC:$mask, _.RC:$src1,
6066 // Patterns to use 512-bit instructions when 128/256 are not available.
6067 multiclass avx512_vptest_lowering_wide_pats<string InstrStr, PatFrag OpNode,
6069 X86VectorVTInfo AndInfo,
6070 X86VectorVTInfo ExtendInfo> {
6071 def : Pat<(_.KVT (OpNode (bitconvert
6072 (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
6074 (_.KVT (COPY_TO_REGCLASS
6075 (!cast<Instruction>(InstrStr#"rr")
6076 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
6077 _.RC:$src1, _.SubRegIdx),
6078 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
6079 _.RC:$src2, _.SubRegIdx)),
6082 def : Pat<(_.KVT (and _.KRC:$mask,
6084 (AndInfo.VT (and _.RC:$src1, _.RC:$src2))),
6087 (!cast<Instruction>(InstrStr#"rrk")
6088 (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC),
6089 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
6090 _.RC:$src1, _.SubRegIdx),
6091 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
6092 _.RC:$src2, _.SubRegIdx)),
6096 multiclass avx512_vptest_lowering_sizes<string InstrStr, PatFrag OpNode,
6098 AVX512VLVectorVTInfo CmpInfo,
6099 AVX512VLVectorVTInfo AndInfo> {
6100 let Predicates = [prd, HasVLX] in {
6101 defm : avx512_vptest_lowering_pats<InstrStr#"Z128", OpNode,
6102 CmpInfo.info128, AndInfo.info128>;
6103 defm : avx512_vptest_lowering_pats<InstrStr#"Z256", OpNode,
6104 CmpInfo.info256, AndInfo.info256>;
6106 let Predicates = [prd] in {
6107 defm : avx512_vptest_lowering_pats<InstrStr#"Z", OpNode,
6108 CmpInfo.info512, AndInfo.info512>;
6111 let Predicates = [prd, NoVLX] in {
6112 defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
6113 CmpInfo.info128, AndInfo.info128,
6115 defm : avx512_vptest_lowering_wide_pats<InstrStr#"Z", OpNode,
6116 CmpInfo.info256, AndInfo.info256,
6121 multiclass avx512_vptest_lowering_types<string InstrStr, PatFrag OpNode> {
6122 defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
6123 avx512vl_i8_info, avx512vl_i16_info>;
6124 defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
6125 avx512vl_i8_info, avx512vl_i32_info>;
6126 defm : avx512_vptest_lowering_sizes<InstrStr # "B", OpNode, HasBWI,
6127 avx512vl_i8_info, avx512vl_i64_info>;
6129 defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
6130 avx512vl_i16_info, avx512vl_i8_info>;
6131 defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
6132 avx512vl_i16_info, avx512vl_i32_info>;
6133 defm : avx512_vptest_lowering_sizes<InstrStr # "W", OpNode, HasBWI,
6134 avx512vl_i16_info, avx512vl_i64_info>;
6136 defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
6137 avx512vl_i32_info, avx512vl_i8_info>;
6138 defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
6139 avx512vl_i32_info, avx512vl_i16_info>;
6140 defm : avx512_vptest_lowering_sizes<InstrStr # "D", OpNode, HasAVX512,
6141 avx512vl_i32_info, avx512vl_i64_info>;
6143 defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
6144 avx512vl_i64_info, avx512vl_i8_info>;
6145 defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
6146 avx512vl_i64_info, avx512vl_i16_info>;
6147 defm : avx512_vptest_lowering_sizes<InstrStr # "Q", OpNode, HasAVX512,
6148 avx512vl_i64_info, avx512vl_i32_info>;
6151 defm : avx512_vptest_lowering_types<"VPTESTM", X86pcmpnem>;
6152 defm : avx512_vptest_lowering_types<"VPTESTNM", X86pcmpeqm>;
6154 //===----------------------------------------------------------------------===//
6155 // AVX-512 Shift instructions
6156 //===----------------------------------------------------------------------===//
6158 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
6159 string OpcodeStr, SDNode OpNode,
6160 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6161 let ExeDomain = _.ExeDomain in {
6162 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
6163 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
6164 "$src2, $src1", "$src1, $src2",
6165 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
6167 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6168 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
6169 "$src2, $src1", "$src1, $src2",
6170 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
6172 Sched<[sched.Folded]>;
6176 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
6177 string OpcodeStr, SDNode OpNode,
6178 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6179 let ExeDomain = _.ExeDomain in
6180 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
6181 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
6182 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
6183 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
6184 EVEX_B, Sched<[sched.Folded]>;
6187 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6188 X86FoldableSchedWrite sched, ValueType SrcVT,
6189 X86VectorVTInfo _> {
6190 // src2 is always 128-bit
6191 let ExeDomain = _.ExeDomain in {
6192 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6193 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
6194 "$src2, $src1", "$src1, $src2",
6195 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
6196 AVX512BIBase, EVEX_4V, Sched<[sched]>;
6197 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6198 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
6199 "$src2, $src1", "$src1, $src2",
6200 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
6202 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
6206 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6207 X86SchedWriteWidths sched, ValueType SrcVT,
6208 AVX512VLVectorVTInfo VTInfo,
6210 let Predicates = [prd] in
6211 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
6212 VTInfo.info512>, EVEX_V512,
6213 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
6214 let Predicates = [prd, HasVLX] in {
6215 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
6216 VTInfo.info256>, EVEX_V256,
6217 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
6218 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
6219 VTInfo.info128>, EVEX_V128,
6220 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
6224 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
6225 string OpcodeStr, SDNode OpNode,
6226 X86SchedWriteWidths sched,
6227 bit NotEVEX2VEXConvertibleQ = 0> {
6228 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
6229 avx512vl_i32_info, HasAVX512>;
6230 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6231 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
6232 avx512vl_i64_info, HasAVX512>, VEX_W;
6233 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
6234 avx512vl_i16_info, HasBWI>;
6237 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6238 string OpcodeStr, SDNode OpNode,
6239 X86SchedWriteWidths sched,
6240 AVX512VLVectorVTInfo VTInfo> {
6241 let Predicates = [HasAVX512] in
6242 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6243 sched.ZMM, VTInfo.info512>,
6244 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
6245 VTInfo.info512>, EVEX_V512;
6246 let Predicates = [HasAVX512, HasVLX] in {
6247 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6248 sched.YMM, VTInfo.info256>,
6249 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
6250 VTInfo.info256>, EVEX_V256;
6251 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6252 sched.XMM, VTInfo.info128>,
6253 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
6254 VTInfo.info128>, EVEX_V128;
6258 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
6259 string OpcodeStr, SDNode OpNode,
6260 X86SchedWriteWidths sched> {
6261 let Predicates = [HasBWI] in
6262 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6263 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
6264 let Predicates = [HasVLX, HasBWI] in {
6265 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6266 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
6267 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6268 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
6272 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
6273 Format ImmFormR, Format ImmFormM,
6274 string OpcodeStr, SDNode OpNode,
6275 X86SchedWriteWidths sched,
6276 bit NotEVEX2VEXConvertibleQ = 0> {
6277 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
6278 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
6279 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
6280 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
6281 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
6284 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
6285 SchedWriteVecShiftImm>,
6286 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
6287 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6289 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
6290 SchedWriteVecShiftImm>,
6291 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
6292 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6294 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
6295 SchedWriteVecShiftImm, 1>,
6296 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
6297 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6299 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
6300 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6301 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
6302 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
6304 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
6305 SchedWriteVecShift>;
6306 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
6307 SchedWriteVecShift, 1>;
6308 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
6309 SchedWriteVecShift>;
6311 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
6312 let Predicates = [HasAVX512, NoVLX] in {
6313 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
6314 (EXTRACT_SUBREG (v8i64
6316 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6317 VR128X:$src2)), sub_ymm)>;
6319 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6320 (EXTRACT_SUBREG (v8i64
6322 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6323 VR128X:$src2)), sub_xmm)>;
6325 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
6326 (EXTRACT_SUBREG (v8i64
6328 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6329 imm:$src2)), sub_ymm)>;
6331 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
6332 (EXTRACT_SUBREG (v8i64
6334 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6335 imm:$src2)), sub_xmm)>;
6338 //===-------------------------------------------------------------------===//
6339 // Variable Bit Shifts
6340 //===-------------------------------------------------------------------===//
6342 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
6343 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6344 let ExeDomain = _.ExeDomain in {
6345 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6346 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
6347 "$src2, $src1", "$src1, $src2",
6348 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
6349 AVX5128IBase, EVEX_4V, Sched<[sched]>;
6350 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6351 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
6352 "$src2, $src1", "$src1, $src2",
6353 (_.VT (OpNode _.RC:$src1,
6354 (_.VT (_.LdFrag addr:$src2))))>,
6355 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6356 Sched<[sched.Folded, sched.ReadAfterFold]>;
6360 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
6361 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
6362 let ExeDomain = _.ExeDomain in
6363 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6364 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6365 "${src2}"##_.BroadcastStr##", $src1",
6366 "$src1, ${src2}"##_.BroadcastStr,
6367 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
6368 (_.ScalarLdFrag addr:$src2)))))>,
6369 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6370 Sched<[sched.Folded, sched.ReadAfterFold]>;
6373 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6374 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
6375 let Predicates = [HasAVX512] in
6376 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
6377 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
6379 let Predicates = [HasAVX512, HasVLX] in {
6380 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
6381 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
6382 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
6383 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
6387 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
6388 SDNode OpNode, X86SchedWriteWidths sched> {
6389 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
6391 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
6392 avx512vl_i64_info>, VEX_W;
6395 // Use 512bit version to implement 128/256 bit in case NoVLX.
6396 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6397 SDNode OpNode, list<Predicate> p> {
6398 let Predicates = p in {
6399 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6400 (_.info256.VT _.info256.RC:$src2))),
6402 (!cast<Instruction>(OpcodeStr#"Zrr")
6403 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6404 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6407 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6408 (_.info128.VT _.info128.RC:$src2))),
6410 (!cast<Instruction>(OpcodeStr#"Zrr")
6411 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6412 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6416 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6417 SDNode OpNode, X86SchedWriteWidths sched> {
6418 let Predicates = [HasBWI] in
6419 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6421 let Predicates = [HasVLX, HasBWI] in {
6423 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6425 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6430 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SchedWriteVarVecShift>,
6431 avx512_var_shift_w<0x12, "vpsllvw", shl, SchedWriteVarVecShift>;
6433 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SchedWriteVarVecShift>,
6434 avx512_var_shift_w<0x11, "vpsravw", sra, SchedWriteVarVecShift>;
6436 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SchedWriteVarVecShift>,
6437 avx512_var_shift_w<0x10, "vpsrlvw", srl, SchedWriteVarVecShift>;
6439 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6440 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6442 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
6443 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
6444 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
6445 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
6447 // Special handing for handling VPSRAV intrinsics.
6448 multiclass avx512_var_shift_int_lowering<string InstrStr, SDNode OpNode,
6449 X86VectorVTInfo _, list<Predicate> p> {
6450 let Predicates = p in {
6451 def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
6452 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
6454 def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
6455 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
6456 _.RC:$src1, addr:$src2)>;
6457 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6458 (OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)),
6459 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
6460 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
6461 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6462 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
6464 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
6465 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6466 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6467 (OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
6468 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
6469 _.RC:$src1, _.RC:$src2)>;
6470 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6471 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
6473 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
6474 _.RC:$src1, addr:$src2)>;
6478 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, SDNode OpNode,
6480 list<Predicate> p> :
6481 avx512_var_shift_int_lowering<InstrStr, OpNode, _, p> {
6482 let Predicates = p in {
6483 def : Pat<(_.VT (OpNode _.RC:$src1,
6484 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
6485 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
6486 _.RC:$src1, addr:$src2)>;
6487 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6489 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6491 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
6492 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
6493 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6495 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
6497 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
6498 _.RC:$src1, addr:$src2)>;
6502 multiclass avx512_var_shift_int_lowering_vl<string InstrStr, SDNode OpNode,
6503 AVX512VLVectorVTInfo VTInfo,
6505 defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info512, [p]>;
6506 defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info256,
6508 defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info128,
6512 multiclass avx512_var_shift_int_lowering_mb_vl<string InstrStr, SDNode OpNode,
6513 AVX512VLVectorVTInfo VTInfo,
6515 defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info512, [p]>;
6516 defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info256,
6518 defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info128,
6522 defm : avx512_var_shift_int_lowering_vl<"VPSRAVW", X86vsrav, avx512vl_i16_info,
6524 defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVD", X86vsrav,
6525 avx512vl_i32_info, HasAVX512>;
6526 defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVQ", X86vsrav,
6527 avx512vl_i64_info, HasAVX512>;
6529 defm : avx512_var_shift_int_lowering_vl<"VPSRLVW", X86vsrlv, avx512vl_i16_info,
6531 defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVD", X86vsrlv,
6532 avx512vl_i32_info, HasAVX512>;
6533 defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVQ", X86vsrlv,
6534 avx512vl_i64_info, HasAVX512>;
6536 defm : avx512_var_shift_int_lowering_vl<"VPSLLVW", X86vshlv, avx512vl_i16_info,
6538 defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVD", X86vshlv,
6539 avx512vl_i32_info, HasAVX512>;
6540 defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVQ", X86vshlv,
6541 avx512vl_i64_info, HasAVX512>;
6544 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6545 let Predicates = [HasAVX512, NoVLX] in {
6546 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6547 (EXTRACT_SUBREG (v8i64
6549 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6550 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6552 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6553 (EXTRACT_SUBREG (v8i64
6555 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6556 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6559 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6560 (EXTRACT_SUBREG (v16i32
6562 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6563 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6565 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6566 (EXTRACT_SUBREG (v16i32
6568 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6569 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6572 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
6573 (EXTRACT_SUBREG (v8i64
6575 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6576 imm:$src2)), sub_xmm)>;
6577 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
6578 (EXTRACT_SUBREG (v8i64
6580 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6581 imm:$src2)), sub_ymm)>;
6583 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
6584 (EXTRACT_SUBREG (v16i32
6586 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6587 imm:$src2)), sub_xmm)>;
6588 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
6589 (EXTRACT_SUBREG (v16i32
6591 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6592 imm:$src2)), sub_ymm)>;
6595 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6596 let Predicates = [HasAVX512, NoVLX] in {
6597 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6598 (EXTRACT_SUBREG (v8i64
6600 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6601 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6603 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6604 (EXTRACT_SUBREG (v8i64
6606 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6607 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6610 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6611 (EXTRACT_SUBREG (v16i32
6613 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6614 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6616 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6617 (EXTRACT_SUBREG (v16i32
6619 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6620 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6623 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
6624 (EXTRACT_SUBREG (v8i64
6626 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6627 imm:$src2)), sub_xmm)>;
6628 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
6629 (EXTRACT_SUBREG (v8i64
6631 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6632 imm:$src2)), sub_ymm)>;
6634 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
6635 (EXTRACT_SUBREG (v16i32
6637 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6638 imm:$src2)), sub_xmm)>;
6639 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
6640 (EXTRACT_SUBREG (v16i32
6642 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6643 imm:$src2)), sub_ymm)>;
6646 //===-------------------------------------------------------------------===//
6647 // 1-src variable permutation VPERMW/D/Q
6648 //===-------------------------------------------------------------------===//
6650 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6651 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6652 let Predicates = [HasAVX512] in
6653 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6654 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6656 let Predicates = [HasAVX512, HasVLX] in
6657 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6658 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6661 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6662 string OpcodeStr, SDNode OpNode,
6663 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6664 let Predicates = [HasAVX512] in
6665 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6666 sched, VTInfo.info512>,
6667 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6668 sched, VTInfo.info512>, EVEX_V512;
6669 let Predicates = [HasAVX512, HasVLX] in
6670 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6671 sched, VTInfo.info256>,
6672 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6673 sched, VTInfo.info256>, EVEX_V256;
6676 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6677 Predicate prd, SDNode OpNode,
6678 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6679 let Predicates = [prd] in
6680 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6682 let Predicates = [HasVLX, prd] in {
6683 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6685 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6690 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6691 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6692 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6693 WriteVarShuffle256, avx512vl_i8_info>;
6695 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6696 WriteVarShuffle256, avx512vl_i32_info>;
6697 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6698 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6699 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6700 WriteFVarShuffle256, avx512vl_f32_info>;
6701 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6702 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6704 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6705 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6706 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6707 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6708 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6709 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6711 //===----------------------------------------------------------------------===//
6712 // AVX-512 - VPERMIL
6713 //===----------------------------------------------------------------------===//
6715 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6716 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6717 X86VectorVTInfo Ctrl> {
6718 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6719 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6720 "$src2, $src1", "$src1, $src2",
6721 (_.VT (OpNode _.RC:$src1,
6722 (Ctrl.VT Ctrl.RC:$src2)))>,
6723 T8PD, EVEX_4V, Sched<[sched]>;
6724 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6725 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6726 "$src2, $src1", "$src1, $src2",
6729 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6730 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6731 Sched<[sched.Folded, sched.ReadAfterFold]>;
6732 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6733 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6734 "${src2}"##_.BroadcastStr##", $src1",
6735 "$src1, ${src2}"##_.BroadcastStr,
6738 (Ctrl.VT (X86VBroadcast
6739 (Ctrl.ScalarLdFrag addr:$src2)))))>,
6740 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6741 Sched<[sched.Folded, sched.ReadAfterFold]>;
6744 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6745 X86SchedWriteWidths sched,
6746 AVX512VLVectorVTInfo _,
6747 AVX512VLVectorVTInfo Ctrl> {
6748 let Predicates = [HasAVX512] in {
6749 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6750 _.info512, Ctrl.info512>, EVEX_V512;
6752 let Predicates = [HasAVX512, HasVLX] in {
6753 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6754 _.info128, Ctrl.info128>, EVEX_V128;
6755 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6756 _.info256, Ctrl.info256>, EVEX_V256;
6760 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6761 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6762 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6764 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6765 X86VPermilpi, SchedWriteFShuffle, _>,
6766 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6769 let ExeDomain = SSEPackedSingle in
6770 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6772 let ExeDomain = SSEPackedDouble in
6773 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6774 avx512vl_i64_info>, VEX_W1X;
6776 //===----------------------------------------------------------------------===//
6777 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6778 //===----------------------------------------------------------------------===//
6780 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6781 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6782 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6783 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6784 X86PShufhw, SchedWriteShuffle>,
6785 EVEX, AVX512XSIi8Base;
6786 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6787 X86PShuflw, SchedWriteShuffle>,
6788 EVEX, AVX512XDIi8Base;
6790 //===----------------------------------------------------------------------===//
6791 // AVX-512 - VPSHUFB
6792 //===----------------------------------------------------------------------===//
6794 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6795 X86SchedWriteWidths sched> {
6796 let Predicates = [HasBWI] in
6797 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6800 let Predicates = [HasVLX, HasBWI] in {
6801 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6803 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6808 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6809 SchedWriteVarShuffle>, VEX_WIG;
6811 //===----------------------------------------------------------------------===//
6812 // Move Low to High and High to Low packed FP Instructions
6813 //===----------------------------------------------------------------------===//
6815 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6816 (ins VR128X:$src1, VR128X:$src2),
6817 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6818 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6819 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6820 let isCommutable = 1 in
6821 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6822 (ins VR128X:$src1, VR128X:$src2),
6823 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6824 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6825 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6827 //===----------------------------------------------------------------------===//
6828 // VMOVHPS/PD VMOVLPS Instructions
6829 // All patterns was taken from SSS implementation.
6830 //===----------------------------------------------------------------------===//
6832 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6833 SDPatternOperator OpNode,
6834 X86VectorVTInfo _> {
6835 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6836 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6837 (ins _.RC:$src1, f64mem:$src2),
6838 !strconcat(OpcodeStr,
6839 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6843 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6844 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6847 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6848 // SSE1. And MOVLPS pattern is even more complex.
6849 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6850 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6851 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6852 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6853 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6854 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6855 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6856 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6858 let Predicates = [HasAVX512] in {
6860 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
6861 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
6862 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6865 let SchedRW = [WriteFStore] in {
6866 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6867 (ins f64mem:$dst, VR128X:$src),
6868 "vmovhps\t{$src, $dst|$dst, $src}",
6869 [(store (f64 (extractelt
6870 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6871 (bc_v2f64 (v4f32 VR128X:$src))),
6872 (iPTR 0))), addr:$dst)]>,
6873 EVEX, EVEX_CD8<32, CD8VT2>;
6874 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6875 (ins f64mem:$dst, VR128X:$src),
6876 "vmovhpd\t{$src, $dst|$dst, $src}",
6877 [(store (f64 (extractelt
6878 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6879 (iPTR 0))), addr:$dst)]>,
6880 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6881 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6882 (ins f64mem:$dst, VR128X:$src),
6883 "vmovlps\t{$src, $dst|$dst, $src}",
6884 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
6885 (iPTR 0))), addr:$dst)]>,
6886 EVEX, EVEX_CD8<32, CD8VT2>;
6887 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6888 (ins f64mem:$dst, VR128X:$src),
6889 "vmovlpd\t{$src, $dst|$dst, $src}",
6890 [(store (f64 (extractelt (v2f64 VR128X:$src),
6891 (iPTR 0))), addr:$dst)]>,
6892 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6895 let Predicates = [HasAVX512] in {
6897 def : Pat<(store (f64 (extractelt
6898 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6899 (iPTR 0))), addr:$dst),
6900 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6902 //===----------------------------------------------------------------------===//
6903 // FMA - Fused Multiply Operations
6906 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6907 X86FoldableSchedWrite sched,
6908 X86VectorVTInfo _, string Suff> {
6909 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6910 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6911 (ins _.RC:$src2, _.RC:$src3),
6912 OpcodeStr, "$src3, $src2", "$src2, $src3",
6913 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6914 AVX512FMA3Base, Sched<[sched]>;
6916 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6917 (ins _.RC:$src2, _.MemOp:$src3),
6918 OpcodeStr, "$src3, $src2", "$src2, $src3",
6919 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6920 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6922 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6923 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6924 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6925 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6927 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6928 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6932 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6933 X86FoldableSchedWrite sched,
6934 X86VectorVTInfo _, string Suff> {
6935 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6936 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6937 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6938 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6939 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
6940 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6943 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6944 SDNode OpNodeRnd, X86SchedWriteWidths sched,
6945 AVX512VLVectorVTInfo _, string Suff> {
6946 let Predicates = [HasAVX512] in {
6947 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.ZMM,
6949 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6951 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6953 let Predicates = [HasVLX, HasAVX512] in {
6954 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.YMM,
6956 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6957 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, sched.XMM,
6959 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6963 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6965 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6966 SchedWriteFMA, avx512vl_f32_info, "PS">;
6967 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6968 SchedWriteFMA, avx512vl_f64_info, "PD">,
6972 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6973 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6974 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6975 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6976 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6977 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6980 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6981 X86FoldableSchedWrite sched,
6982 X86VectorVTInfo _, string Suff> {
6983 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6984 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6985 (ins _.RC:$src2, _.RC:$src3),
6986 OpcodeStr, "$src3, $src2", "$src2, $src3",
6987 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1,
6988 vselect, 1>, AVX512FMA3Base, Sched<[sched]>;
6990 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6991 (ins _.RC:$src2, _.MemOp:$src3),
6992 OpcodeStr, "$src3, $src2", "$src2, $src3",
6993 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6994 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6996 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6997 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6998 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6999 "$src2, ${src3}"##_.BroadcastStr,
7000 (_.VT (OpNode _.RC:$src2,
7001 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
7002 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
7003 Sched<[sched.Folded, sched.ReadAfterFold]>;
7007 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
7008 X86FoldableSchedWrite sched,
7009 X86VectorVTInfo _, string Suff> {
7010 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
7011 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7012 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7013 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
7014 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
7016 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
7019 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7020 SDNode OpNodeRnd, X86SchedWriteWidths sched,
7021 AVX512VLVectorVTInfo _, string Suff> {
7022 let Predicates = [HasAVX512] in {
7023 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.ZMM,
7025 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
7027 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7029 let Predicates = [HasVLX, HasAVX512] in {
7030 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.YMM,
7032 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7033 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, sched.XMM,
7035 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7039 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
7040 SDNode OpNodeRnd > {
7041 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
7042 SchedWriteFMA, avx512vl_f32_info, "PS">;
7043 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
7044 SchedWriteFMA, avx512vl_f64_info, "PD">,
7048 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
7049 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
7050 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
7051 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
7052 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
7053 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
7055 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7056 X86FoldableSchedWrite sched,
7057 X86VectorVTInfo _, string Suff> {
7058 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
7059 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7060 (ins _.RC:$src2, _.RC:$src3),
7061 OpcodeStr, "$src3, $src2", "$src2, $src3",
7062 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1, vselect, 1>,
7063 AVX512FMA3Base, Sched<[sched]>;
7065 // Pattern is 312 order so that the load is in a different place from the
7066 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7067 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7068 (ins _.RC:$src2, _.MemOp:$src3),
7069 OpcodeStr, "$src3, $src2", "$src2, $src3",
7070 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
7071 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7073 // Pattern is 312 order so that the load is in a different place from the
7074 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7075 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7076 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7077 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
7078 "$src2, ${src3}"##_.BroadcastStr,
7079 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
7080 _.RC:$src1, _.RC:$src2)), 1, 0>,
7081 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7085 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
7086 X86FoldableSchedWrite sched,
7087 X86VectorVTInfo _, string Suff> {
7088 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
7089 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7090 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7091 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
7092 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
7094 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
7097 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7098 SDNode OpNodeRnd, X86SchedWriteWidths sched,
7099 AVX512VLVectorVTInfo _, string Suff> {
7100 let Predicates = [HasAVX512] in {
7101 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.ZMM,
7103 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
7105 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7107 let Predicates = [HasVLX, HasAVX512] in {
7108 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.YMM,
7110 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7111 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, sched.XMM,
7113 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7117 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
7118 SDNode OpNodeRnd > {
7119 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
7120 SchedWriteFMA, avx512vl_f32_info, "PS">;
7121 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
7122 SchedWriteFMA, avx512vl_f64_info, "PD">,
7126 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
7127 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
7128 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
7129 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
7130 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
7131 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
7134 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7135 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
7136 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
7137 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7138 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
7139 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7140 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>;
7143 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7144 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
7145 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
7146 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
7148 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7149 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
7150 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
7151 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
7153 let isCodeGenOnly = 1, isCommutable = 1 in {
7154 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
7155 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
7156 !strconcat(OpcodeStr,
7157 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7158 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>;
7159 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
7160 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
7161 !strconcat(OpcodeStr,
7162 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7163 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>;
7165 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
7166 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
7167 !strconcat(OpcodeStr,
7168 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
7169 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
7170 Sched<[SchedWriteFMA.Scl]>;
7171 }// isCodeGenOnly = 1
7172 }// Constraints = "$src1 = $dst"
7175 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7176 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
7177 X86VectorVTInfo _, string SUFF> {
7178 let ExeDomain = _.ExeDomain in {
7179 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
7180 // Operands for intrinsic are in 123 order to preserve passthu
7182 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7184 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
7185 (_.ScalarLdFrag addr:$src3)))),
7186 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
7187 _.FRC:$src3, (i32 imm:$rc)))), 0>;
7189 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
7190 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
7192 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
7193 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
7194 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
7195 _.FRC:$src1, (i32 imm:$rc)))), 1>;
7197 // One pattern is 312 order so that the load is in a different place from the
7198 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
7199 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
7200 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
7202 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
7203 _.FRC:$src1, _.FRC:$src2))),
7204 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
7205 _.FRC:$src2, (i32 imm:$rc)))), 1>;
7209 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
7210 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
7211 let Predicates = [HasAVX512] in {
7212 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7213 OpNodeRnd, f32x_info, "SS">,
7214 EVEX_CD8<32, CD8VT1>, VEX_LIG;
7215 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
7216 OpNodeRnd, f64x_info, "SD">,
7217 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
7221 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
7222 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
7223 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
7224 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
7226 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
7227 string Suffix, SDNode Move,
7228 X86VectorVTInfo _, PatLeaf ZeroFP> {
7229 let Predicates = [HasAVX512] in {
7230 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7232 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7234 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
7235 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7236 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7238 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7239 (Op _.FRC:$src2, _.FRC:$src3,
7240 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7241 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
7242 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7243 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7245 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7247 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7248 (_.ScalarLdFrag addr:$src3)))))),
7249 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
7250 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7253 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7254 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7255 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
7256 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
7257 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7260 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7261 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7262 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7263 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
7264 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7267 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7268 (X86selects VK1WM:$mask,
7270 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7272 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7273 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
7274 VR128X:$src1, VK1WM:$mask,
7275 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7276 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7278 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7279 (X86selects VK1WM:$mask,
7281 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7282 (_.ScalarLdFrag addr:$src3)),
7283 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7284 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
7285 VR128X:$src1, VK1WM:$mask,
7286 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7288 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7289 (X86selects VK1WM:$mask,
7290 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7291 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
7292 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7293 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
7294 VR128X:$src1, VK1WM:$mask,
7295 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7297 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7298 (X86selects VK1WM:$mask,
7299 (Op _.FRC:$src2, _.FRC:$src3,
7300 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7301 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7302 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
7303 VR128X:$src1, VK1WM:$mask,
7304 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7305 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7307 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7308 (X86selects VK1WM:$mask,
7309 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7310 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7311 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7312 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
7313 VR128X:$src1, VK1WM:$mask,
7314 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7316 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7317 (X86selects VK1WM:$mask,
7319 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7321 (_.EltVT ZeroFP)))))),
7322 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
7323 VR128X:$src1, VK1WM:$mask,
7324 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7325 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7327 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7328 (X86selects VK1WM:$mask,
7329 (Op _.FRC:$src2, _.FRC:$src3,
7330 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7331 (_.EltVT ZeroFP)))))),
7332 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
7333 VR128X:$src1, VK1WM:$mask,
7334 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7335 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
7337 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7338 (X86selects VK1WM:$mask,
7340 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7341 (_.ScalarLdFrag addr:$src3)),
7342 (_.EltVT ZeroFP)))))),
7343 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
7344 VR128X:$src1, VK1WM:$mask,
7345 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7347 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7348 (X86selects VK1WM:$mask,
7349 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7350 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
7351 (_.EltVT ZeroFP)))))),
7352 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
7353 VR128X:$src1, VK1WM:$mask,
7354 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7356 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7357 (X86selects VK1WM:$mask,
7358 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
7359 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
7360 (_.EltVT ZeroFP)))))),
7361 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
7362 VR128X:$src1, VK1WM:$mask,
7363 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
7365 // Patterns with rounding mode.
7366 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7368 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7369 _.FRC:$src3, (i32 imm:$rc)))))),
7370 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
7371 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7372 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7374 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7375 (RndOp _.FRC:$src2, _.FRC:$src3,
7376 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7378 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
7379 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7380 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7382 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7383 (X86selects VK1WM:$mask,
7385 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7386 _.FRC:$src3, (i32 imm:$rc)),
7387 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7388 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
7389 VR128X:$src1, VK1WM:$mask,
7390 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7391 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7393 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7394 (X86selects VK1WM:$mask,
7395 (RndOp _.FRC:$src2, _.FRC:$src3,
7396 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7398 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
7399 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
7400 VR128X:$src1, VK1WM:$mask,
7401 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7402 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7404 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7405 (X86selects VK1WM:$mask,
7407 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7408 _.FRC:$src3, (i32 imm:$rc)),
7409 (_.EltVT ZeroFP)))))),
7410 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
7411 VR128X:$src1, VK1WM:$mask,
7412 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7413 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7415 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
7416 (X86selects VK1WM:$mask,
7417 (RndOp _.FRC:$src2, _.FRC:$src3,
7418 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
7420 (_.EltVT ZeroFP)))))),
7421 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
7422 VR128X:$src1, VK1WM:$mask,
7423 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
7424 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), imm:$rc)>;
7428 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
7429 X86Movss, v4f32x_info, fp32imm0>;
7430 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
7431 X86Movss, v4f32x_info, fp32imm0>;
7432 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
7433 X86Movss, v4f32x_info, fp32imm0>;
7434 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
7435 X86Movss, v4f32x_info, fp32imm0>;
7437 defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
7438 X86Movsd, v2f64x_info, fp64imm0>;
7439 defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
7440 X86Movsd, v2f64x_info, fp64imm0>;
7441 defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SD",
7442 X86Movsd, v2f64x_info, fp64imm0>;
7443 defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SD",
7444 X86Movsd, v2f64x_info, fp64imm0>;
7446 //===----------------------------------------------------------------------===//
7447 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7448 //===----------------------------------------------------------------------===//
7449 let Constraints = "$src1 = $dst" in {
7450 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7451 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7452 // NOTE: The SDNode have the multiply operands first with the add last.
7453 // This enables commuted load patterns to be autogenerated by tablegen.
7454 let ExeDomain = _.ExeDomain in {
7455 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7456 (ins _.RC:$src2, _.RC:$src3),
7457 OpcodeStr, "$src3, $src2", "$src2, $src3",
7458 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7459 AVX512FMA3Base, Sched<[sched]>;
7461 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7462 (ins _.RC:$src2, _.MemOp:$src3),
7463 OpcodeStr, "$src3, $src2", "$src2, $src3",
7464 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7465 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7467 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7468 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7469 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
7470 !strconcat("$src2, ${src3}", _.BroadcastStr ),
7472 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
7474 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7477 } // Constraints = "$src1 = $dst"
7479 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7480 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7481 let Predicates = [HasIFMA] in {
7482 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7483 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7485 let Predicates = [HasVLX, HasIFMA] in {
7486 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7487 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7488 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7489 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7493 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7494 SchedWriteVecIMul, avx512vl_i64_info>,
7496 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7497 SchedWriteVecIMul, avx512vl_i64_info>,
7500 //===----------------------------------------------------------------------===//
7501 // AVX-512 Scalar convert from sign integer to float/double
7502 //===----------------------------------------------------------------------===//
7504 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched,
7505 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7506 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7507 let hasSideEffects = 0 in {
7508 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7509 (ins DstVT.FRC:$src1, SrcRC:$src),
7510 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7511 EVEX_4V, Sched<[sched]>;
7513 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7514 (ins DstVT.FRC:$src1, x86memop:$src),
7515 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7516 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7517 } // hasSideEffects = 0
7518 let isCodeGenOnly = 1 in {
7519 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7520 (ins DstVT.RC:$src1, SrcRC:$src2),
7521 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7522 [(set DstVT.RC:$dst,
7523 (OpNode (DstVT.VT DstVT.RC:$src1),
7525 (i32 FROUND_CURRENT)))]>,
7526 EVEX_4V, Sched<[sched]>;
7528 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7529 (ins DstVT.RC:$src1, x86memop:$src2),
7530 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7531 [(set DstVT.RC:$dst,
7532 (OpNode (DstVT.VT DstVT.RC:$src1),
7533 (ld_frag addr:$src2),
7534 (i32 FROUND_CURRENT)))]>,
7535 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7536 }//isCodeGenOnly = 1
7539 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7540 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7541 X86VectorVTInfo DstVT, string asm> {
7542 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7543 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7545 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7546 [(set DstVT.RC:$dst,
7547 (OpNode (DstVT.VT DstVT.RC:$src1),
7550 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
7553 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode,
7554 X86FoldableSchedWrite sched,
7555 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7556 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
7557 defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>,
7558 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7559 ld_frag, asm>, VEX_LIG;
7562 let Predicates = [HasAVX512] in {
7563 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR32,
7564 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
7565 XS, EVEX_CD8<32, CD8VT1>;
7566 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SS, GR64,
7567 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
7568 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7569 defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR32,
7570 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
7571 XD, EVEX_CD8<32, CD8VT1>;
7572 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2SD, GR64,
7573 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
7574 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7576 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7577 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7578 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7579 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7581 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
7582 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7583 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
7584 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7585 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
7586 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7587 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
7588 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7590 def : Pat<(f32 (sint_to_fp GR32:$src)),
7591 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7592 def : Pat<(f32 (sint_to_fp GR64:$src)),
7593 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7594 def : Pat<(f64 (sint_to_fp GR32:$src)),
7595 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7596 def : Pat<(f64 (sint_to_fp GR64:$src)),
7597 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7599 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR32,
7600 v4f32x_info, i32mem, loadi32,
7601 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
7602 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SS, GR64,
7603 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
7604 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7605 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR32, v2f64x_info,
7606 i32mem, loadi32, "cvtusi2sd{l}">,
7607 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7608 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2SD, GR64,
7609 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
7610 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7612 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7613 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7614 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7615 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0, "att">;
7617 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
7618 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7619 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
7620 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7621 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
7622 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7623 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
7624 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7626 def : Pat<(f32 (uint_to_fp GR32:$src)),
7627 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7628 def : Pat<(f32 (uint_to_fp GR64:$src)),
7629 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7630 def : Pat<(f64 (uint_to_fp GR32:$src)),
7631 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7632 def : Pat<(f64 (uint_to_fp GR64:$src)),
7633 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7636 //===----------------------------------------------------------------------===//
7637 // AVX-512 Scalar convert from float/double to integer
7638 //===----------------------------------------------------------------------===//
7640 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7641 X86VectorVTInfo DstVT, SDNode OpNode,
7643 X86FoldableSchedWrite sched, string asm,
7645 bit CodeGenOnly = 1> {
7646 let Predicates = [HasAVX512] in {
7647 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7648 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7649 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7650 EVEX, VEX_LIG, Sched<[sched]>;
7651 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7652 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7653 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
7654 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7656 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7657 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7658 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7659 [(set DstVT.RC:$dst, (OpNode
7660 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src)))]>,
7661 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7663 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7664 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7665 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7666 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7667 } // Predicates = [HasAVX512]
7670 multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT,
7671 X86VectorVTInfo DstVT, SDNode OpNode,
7673 X86FoldableSchedWrite sched, string asm,
7675 avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, OpNodeRnd, sched, asm, aliasStr, 0> {
7676 let Predicates = [HasAVX512] in {
7677 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7678 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7679 SrcVT.IntScalarMemOp:$src), 0, "att">;
7680 } // Predicates = [HasAVX512]
7683 // Convert float/double to signed/unsigned int 32/64
7684 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7685 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7686 XS, EVEX_CD8<32, CD8VT1>;
7687 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7688 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7689 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7690 defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, X86cvts2usi,
7691 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7692 XS, EVEX_CD8<32, CD8VT1>;
7693 defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, X86cvts2usi,
7694 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7695 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7696 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7697 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7698 XD, EVEX_CD8<64, CD8VT1>;
7699 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7700 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7701 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7702 defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, X86cvts2usi,
7703 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7704 XD, EVEX_CD8<64, CD8VT1>;
7705 defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, X86cvts2usi,
7706 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7707 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7709 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7710 // which produce unnecessary vmovs{s,d} instructions
7711 let Predicates = [HasAVX512] in {
7712 def : Pat<(v4f32 (X86Movss
7713 (v4f32 VR128X:$dst),
7714 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
7715 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7717 def : Pat<(v4f32 (X86Movss
7718 (v4f32 VR128X:$dst),
7719 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi64 addr:$src))))))),
7720 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7722 def : Pat<(v4f32 (X86Movss
7723 (v4f32 VR128X:$dst),
7724 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
7725 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7727 def : Pat<(v4f32 (X86Movss
7728 (v4f32 VR128X:$dst),
7729 (v4f32 (scalar_to_vector (f32 (sint_to_fp (loadi32 addr:$src))))))),
7730 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7732 def : Pat<(v2f64 (X86Movsd
7733 (v2f64 VR128X:$dst),
7734 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
7735 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7737 def : Pat<(v2f64 (X86Movsd
7738 (v2f64 VR128X:$dst),
7739 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi64 addr:$src))))))),
7740 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7742 def : Pat<(v2f64 (X86Movsd
7743 (v2f64 VR128X:$dst),
7744 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
7745 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7747 def : Pat<(v2f64 (X86Movsd
7748 (v2f64 VR128X:$dst),
7749 (v2f64 (scalar_to_vector (f64 (sint_to_fp (loadi32 addr:$src))))))),
7750 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7752 def : Pat<(v4f32 (X86Movss
7753 (v4f32 VR128X:$dst),
7754 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR64:$src)))))),
7755 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7757 def : Pat<(v4f32 (X86Movss
7758 (v4f32 VR128X:$dst),
7759 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi64 addr:$src))))))),
7760 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7762 def : Pat<(v4f32 (X86Movss
7763 (v4f32 VR128X:$dst),
7764 (v4f32 (scalar_to_vector (f32 (uint_to_fp GR32:$src)))))),
7765 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7767 def : Pat<(v4f32 (X86Movss
7768 (v4f32 VR128X:$dst),
7769 (v4f32 (scalar_to_vector (f32 (uint_to_fp (loadi32 addr:$src))))))),
7770 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7772 def : Pat<(v2f64 (X86Movsd
7773 (v2f64 VR128X:$dst),
7774 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR64:$src)))))),
7775 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7777 def : Pat<(v2f64 (X86Movsd
7778 (v2f64 VR128X:$dst),
7779 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi64 addr:$src))))))),
7780 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7782 def : Pat<(v2f64 (X86Movsd
7783 (v2f64 VR128X:$dst),
7784 (v2f64 (scalar_to_vector (f64 (uint_to_fp GR32:$src)))))),
7785 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7787 def : Pat<(v2f64 (X86Movsd
7788 (v2f64 VR128X:$dst),
7789 (v2f64 (scalar_to_vector (f64 (uint_to_fp (loadi32 addr:$src))))))),
7790 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7791 } // Predicates = [HasAVX512]
7793 // Convert float/double to signed/unsigned int 32/64 with truncation
7794 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7795 X86VectorVTInfo _DstRC, SDNode OpNode,
7796 SDNode OpNodeInt, SDNode OpNodeRnd,
7797 X86FoldableSchedWrite sched, string aliasStr,
7798 bit CodeGenOnly = 1>{
7799 let Predicates = [HasAVX512] in {
7800 let isCodeGenOnly = 1 in {
7801 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7802 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7803 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7804 EVEX, Sched<[sched]>;
7805 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7806 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7807 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7808 EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>;
7811 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7812 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7813 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7814 EVEX, VEX_LIG, Sched<[sched]>;
7815 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7816 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7817 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
7818 (i32 FROUND_NO_EXC)))]>,
7819 EVEX,VEX_LIG , EVEX_B, Sched<[sched]>;
7820 let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in
7821 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7822 (ins _SrcRC.IntScalarMemOp:$src),
7823 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7824 [(set _DstRC.RC:$dst,
7825 (OpNodeInt (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src)))]>,
7826 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7828 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7829 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7830 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7831 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7835 multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm,
7836 X86VectorVTInfo _SrcRC,
7837 X86VectorVTInfo _DstRC, SDNode OpNode,
7838 SDNode OpNodeInt, SDNode OpNodeRnd,
7839 X86FoldableSchedWrite sched,
7841 avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeInt, OpNodeRnd, sched,
7843 let Predicates = [HasAVX512] in {
7844 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7845 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7846 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7850 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7851 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
7852 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7853 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7854 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSS2I,
7855 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7856 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7857 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
7858 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7859 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7860 fp_to_sint, X86cvtts2Int, X86cvtts2IntRnd, WriteCvtSD2I,
7861 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7863 defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info,
7864 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
7865 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7866 defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info,
7867 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSS2I,
7868 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7869 defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7870 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
7871 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7872 defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7873 fp_to_uint, X86cvtts2UInt, X86cvtts2UIntRnd, WriteCvtSD2I,
7874 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7876 //===----------------------------------------------------------------------===//
7877 // AVX-512 Convert form float to double and back
7878 //===----------------------------------------------------------------------===//
7880 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7881 X86VectorVTInfo _Src, SDNode OpNode,
7882 X86FoldableSchedWrite sched> {
7883 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7884 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7885 "$src2, $src1", "$src1, $src2",
7886 (_.VT (OpNode (_.VT _.RC:$src1),
7887 (_Src.VT _Src.RC:$src2),
7888 (i32 FROUND_CURRENT)))>,
7889 EVEX_4V, VEX_LIG, Sched<[sched]>;
7890 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7891 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7892 "$src2, $src1", "$src1, $src2",
7893 (_.VT (OpNode (_.VT _.RC:$src1),
7894 (_Src.VT _Src.ScalarIntMemCPat:$src2),
7895 (i32 FROUND_CURRENT)))>,
7897 Sched<[sched.Folded, sched.ReadAfterFold]>;
7899 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7900 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7901 (ins _.FRC:$src1, _Src.FRC:$src2),
7902 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7903 EVEX_4V, VEX_LIG, Sched<[sched]>;
7905 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7906 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7907 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7908 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7912 // Scalar Coversion with SAE - suppress all exceptions
7913 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7914 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7915 X86FoldableSchedWrite sched> {
7916 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7917 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7918 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7919 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7920 (_Src.VT _Src.RC:$src2),
7921 (i32 FROUND_NO_EXC)))>,
7922 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7925 // Scalar Conversion with rounding control (RC)
7926 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7927 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7928 X86FoldableSchedWrite sched> {
7929 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7930 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7931 "$rc, $src2, $src1", "$src1, $src2, $rc",
7932 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7933 (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
7934 EVEX_4V, VEX_LIG, Sched<[sched]>,
7937 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7938 SDNode OpNodeRnd, X86FoldableSchedWrite sched,
7939 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7940 let Predicates = [HasAVX512] in {
7941 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7942 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7943 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7947 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
7948 X86FoldableSchedWrite sched,
7949 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7950 let Predicates = [HasAVX512] in {
7951 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7952 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>,
7953 EVEX_CD8<32, CD8VT1>, XS;
7956 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
7957 X86froundRnd, WriteCvtSD2SS, f64x_info,
7959 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
7960 X86fpextRnd, WriteCvtSS2SD, f32x_info,
7963 def : Pat<(f64 (fpextend FR32X:$src)),
7964 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7965 Requires<[HasAVX512]>;
7966 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
7967 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7968 Requires<[HasAVX512, OptForSize]>;
7970 def : Pat<(f64 (extloadf32 addr:$src)),
7971 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7972 Requires<[HasAVX512, OptForSize]>;
7974 def : Pat<(f64 (extloadf32 addr:$src)),
7975 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
7976 Requires<[HasAVX512, OptForSpeed]>;
7978 def : Pat<(f32 (fpround FR64X:$src)),
7979 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7980 Requires<[HasAVX512]>;
7982 def : Pat<(v4f32 (X86Movss
7983 (v4f32 VR128X:$dst),
7984 (v4f32 (scalar_to_vector
7985 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7986 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7987 Requires<[HasAVX512]>;
7989 def : Pat<(v2f64 (X86Movsd
7990 (v2f64 VR128X:$dst),
7991 (v2f64 (scalar_to_vector
7992 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7993 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7994 Requires<[HasAVX512]>;
7996 //===----------------------------------------------------------------------===//
7997 // AVX-512 Vector convert from signed/unsigned integer to float/double
7998 // and from float/double to signed/unsigned integer
7999 //===----------------------------------------------------------------------===//
8001 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8002 X86VectorVTInfo _Src, SDNode OpNode,
8003 X86FoldableSchedWrite sched,
8004 string Broadcast = _.BroadcastStr,
8005 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
8006 RegisterClass MaskRC = _.KRCWM> {
8008 defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
8010 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
8011 (ins MaskRC:$mask, _Src.RC:$src),
8012 OpcodeStr, "$src", "$src",
8013 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
8014 (vselect MaskRC:$mask,
8015 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
8017 vselect, "$src0 = $dst">,
8018 EVEX, Sched<[sched]>;
8020 defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
8022 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
8023 (ins MaskRC:$mask, MemOp:$src),
8024 OpcodeStr#Alias, "$src", "$src",
8025 (_.VT (OpNode (_Src.VT
8026 (_Src.LdFrag addr:$src)))),
8027 (vselect MaskRC:$mask,
8028 (_.VT (OpNode (_Src.VT
8029 (_Src.LdFrag addr:$src)))),
8031 vselect, "$src0 = $dst">,
8032 EVEX, Sched<[sched.Folded]>;
8034 defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
8035 (ins _Src.ScalarMemOp:$src),
8036 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
8037 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
8039 "${src}"##Broadcast, "${src}"##Broadcast,
8040 (_.VT (OpNode (_Src.VT
8041 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
8043 (vselect MaskRC:$mask,
8048 (_Src.ScalarLdFrag addr:$src))))),
8050 vselect, "$src0 = $dst">,
8051 EVEX, EVEX_B, Sched<[sched.Folded]>;
8053 // Coversion with SAE - suppress all exceptions
8054 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8055 X86VectorVTInfo _Src, SDNode OpNodeRnd,
8056 X86FoldableSchedWrite sched> {
8057 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8058 (ins _Src.RC:$src), OpcodeStr,
8059 "{sae}, $src", "$src, {sae}",
8060 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
8061 (i32 FROUND_NO_EXC)))>,
8062 EVEX, EVEX_B, Sched<[sched]>;
8065 // Conversion with rounding control (RC)
8066 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8067 X86VectorVTInfo _Src, SDNode OpNodeRnd,
8068 X86FoldableSchedWrite sched> {
8069 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8070 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
8071 "$rc, $src", "$src, $rc",
8072 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
8073 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8076 // Extend Float to Double
8077 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
8078 X86SchedWriteWidths sched> {
8079 let Predicates = [HasAVX512] in {
8080 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
8081 fpextend, sched.ZMM>,
8082 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
8083 X86vfpextRnd, sched.ZMM>, EVEX_V512;
8085 let Predicates = [HasVLX] in {
8086 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
8087 X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
8088 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
8089 sched.YMM>, EVEX_V256;
8093 // Truncate Double to Float
8094 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
8095 let Predicates = [HasAVX512] in {
8096 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched.ZMM>,
8097 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
8098 X86vfproundRnd, sched.ZMM>, EVEX_V512;
8100 let Predicates = [HasVLX] in {
8101 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
8102 null_frag, sched.XMM, "{1to2}", "{x}", f128mem, VK2WM>,
8104 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
8105 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8107 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8108 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
8109 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8110 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
8111 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8112 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
8113 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8114 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
8118 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
8119 VEX_W, PD, EVEX_CD8<64, CD8VF>;
8120 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
8121 PS, EVEX_CD8<32, CD8VH>;
8123 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
8124 (VCVTPS2PDZrm addr:$src)>;
8126 let Predicates = [HasVLX] in {
8127 def : Pat<(X86vzmovl (v2f64 (bitconvert
8128 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
8129 (VCVTPD2PSZ128rr VR128X:$src)>;
8130 def : Pat<(X86vzmovl (v2f64 (bitconvert
8131 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
8132 (VCVTPD2PSZ128rm addr:$src)>;
8133 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
8134 (VCVTPS2PDZ128rm addr:$src)>;
8135 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
8136 (VCVTPS2PDZ256rm addr:$src)>;
8138 // Special patterns to allow use of X86vmfpround for masking. Instruction
8139 // patterns have been disabled with null_frag.
8140 def : Pat<(X86vfpround (v2f64 VR128X:$src)),
8141 (VCVTPD2PSZ128rr VR128X:$src)>;
8142 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
8144 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8145 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8147 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8149 def : Pat<(X86vfpround (loadv2f64 addr:$src)),
8150 (VCVTPD2PSZ128rm addr:$src)>;
8151 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
8153 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8154 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
8156 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8158 def : Pat<(X86vfpround (v2f64 (X86VBroadcast (loadf64 addr:$src)))),
8159 (VCVTPD2PSZ128rmb addr:$src)>;
8160 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8161 (v4f32 VR128X:$src0), VK2WM:$mask),
8162 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8163 def : Pat<(X86vmfpround (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8164 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8165 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8168 // Convert Signed/Unsigned Doubleword to Double
8169 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8170 SDNode OpNode128, X86SchedWriteWidths sched> {
8171 // No rounding in this op
8172 let Predicates = [HasAVX512] in
8173 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
8174 sched.ZMM>, EVEX_V512;
8176 let Predicates = [HasVLX] in {
8177 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
8178 OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
8179 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
8180 sched.YMM>, EVEX_V256;
8184 // Convert Signed/Unsigned Doubleword to Float
8185 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8186 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8187 let Predicates = [HasAVX512] in
8188 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
8190 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
8191 OpNodeRnd, sched.ZMM>, EVEX_V512;
8193 let Predicates = [HasVLX] in {
8194 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
8195 sched.XMM>, EVEX_V128;
8196 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
8197 sched.YMM>, EVEX_V256;
8201 // Convert Float to Signed/Unsigned Doubleword with truncation
8202 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8203 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8204 let Predicates = [HasAVX512] in {
8205 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8207 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
8208 OpNodeRnd, sched.ZMM>, EVEX_V512;
8210 let Predicates = [HasVLX] in {
8211 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8212 sched.XMM>, EVEX_V128;
8213 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8214 sched.YMM>, EVEX_V256;
8218 // Convert Float to Signed/Unsigned Doubleword
8219 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8220 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8221 let Predicates = [HasAVX512] in {
8222 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
8224 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
8225 OpNodeRnd, sched.ZMM>, EVEX_V512;
8227 let Predicates = [HasVLX] in {
8228 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
8229 sched.XMM>, EVEX_V128;
8230 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
8231 sched.YMM>, EVEX_V256;
8235 // Convert Double to Signed/Unsigned Doubleword with truncation
8236 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8237 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8238 let Predicates = [HasAVX512] in {
8239 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8241 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
8242 OpNodeRnd, sched.ZMM>, EVEX_V512;
8244 let Predicates = [HasVLX] in {
8245 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8246 // memory forms of these instructions in Asm Parser. They have the same
8247 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8248 // due to the same reason.
8249 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8250 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8252 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8253 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8255 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8256 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
8257 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8258 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
8259 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8260 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
8261 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8262 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
8266 // Convert Double to Signed/Unsigned Doubleword
8267 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8268 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8269 let Predicates = [HasAVX512] in {
8270 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
8272 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
8273 OpNodeRnd, sched.ZMM>, EVEX_V512;
8275 let Predicates = [HasVLX] in {
8276 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8277 // memory forms of these instructions in Asm Parcer. They have the same
8278 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8279 // due to the same reason.
8280 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
8281 null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
8283 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
8284 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
8286 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8287 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
8288 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8289 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0, "intel">;
8290 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8291 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
8292 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8293 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0, "intel">;
8297 // Convert Double to Signed/Unsigned Quardword
8298 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8299 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8300 let Predicates = [HasDQI] in {
8301 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8303 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8304 OpNodeRnd, sched.ZMM>, EVEX_V512;
8306 let Predicates = [HasDQI, HasVLX] in {
8307 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8308 sched.XMM>, EVEX_V128;
8309 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8310 sched.YMM>, EVEX_V256;
8314 // Convert Double to Signed/Unsigned Quardword with truncation
8315 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8316 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8317 let Predicates = [HasDQI] in {
8318 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8320 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8321 OpNodeRnd, sched.ZMM>, EVEX_V512;
8323 let Predicates = [HasDQI, HasVLX] in {
8324 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8325 sched.XMM>, EVEX_V128;
8326 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8327 sched.YMM>, EVEX_V256;
8331 // Convert Signed/Unsigned Quardword to Double
8332 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8333 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8334 let Predicates = [HasDQI] in {
8335 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8337 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8338 OpNodeRnd, sched.ZMM>, EVEX_V512;
8340 let Predicates = [HasDQI, HasVLX] in {
8341 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8342 sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8343 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8344 sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8348 // Convert Float to Signed/Unsigned Quardword
8349 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8350 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8351 let Predicates = [HasDQI] in {
8352 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8354 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8355 OpNodeRnd, sched.ZMM>, EVEX_V512;
8357 let Predicates = [HasDQI, HasVLX] in {
8358 // Explicitly specified broadcast string, since we take only 2 elements
8359 // from v4f32x_info source
8360 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8361 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
8362 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8363 sched.YMM>, EVEX_V256;
8367 // Convert Float to Signed/Unsigned Quardword with truncation
8368 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8369 SDNode OpNodeRnd, X86SchedWriteWidths sched> {
8370 let Predicates = [HasDQI] in {
8371 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched.ZMM>,
8372 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8373 OpNodeRnd, sched.ZMM>, EVEX_V512;
8375 let Predicates = [HasDQI, HasVLX] in {
8376 // Explicitly specified broadcast string, since we take only 2 elements
8377 // from v4f32x_info source
8378 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8379 sched.XMM, "{1to2}", "", f64mem>, EVEX_V128;
8380 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8381 sched.YMM>, EVEX_V256;
8385 // Convert Signed/Unsigned Quardword to Float
8386 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8387 SDNode OpNode128, SDNode OpNodeRnd,
8388 X86SchedWriteWidths sched> {
8389 let Predicates = [HasDQI] in {
8390 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8392 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8393 OpNodeRnd, sched.ZMM>, EVEX_V512;
8395 let Predicates = [HasDQI, HasVLX] in {
8396 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8397 // memory forms of these instructions in Asm Parcer. They have the same
8398 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8399 // due to the same reason.
8400 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
8401 sched.XMM, "{1to2}", "{x}">, EVEX_V128,
8402 NotEVEX2VEXConvertible;
8403 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8404 sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8405 NotEVEX2VEXConvertible;
8407 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8408 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
8409 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
8410 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0, "intel">;
8411 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8412 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
8413 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
8414 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0, "intel">;
8418 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
8419 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8421 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
8422 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8423 PS, EVEX_CD8<32, CD8VF>;
8425 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86cvttp2si,
8426 X86cvttp2siRnd, SchedWriteCvtPS2DQ>,
8427 XS, EVEX_CD8<32, CD8VF>;
8429 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86cvttp2si,
8430 X86cvttp2siRnd, SchedWriteCvtPD2DQ>,
8431 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8433 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86cvttp2ui,
8434 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PS,
8435 EVEX_CD8<32, CD8VF>;
8437 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86cvttp2ui,
8438 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>,
8439 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8441 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
8442 X86VUintToFP, SchedWriteCvtDQ2PD>, XS,
8443 EVEX_CD8<32, CD8VH>;
8445 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
8446 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, XD,
8447 EVEX_CD8<32, CD8VF>;
8449 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
8450 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8451 EVEX_CD8<32, CD8VF>;
8453 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
8454 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8455 VEX_W, EVEX_CD8<64, CD8VF>;
8457 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
8458 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8459 PS, EVEX_CD8<32, CD8VF>;
8461 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
8462 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8463 PS, EVEX_CD8<64, CD8VF>;
8465 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
8466 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8467 PD, EVEX_CD8<64, CD8VF>;
8469 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
8470 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8471 EVEX_CD8<32, CD8VH>;
8473 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
8474 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8475 PD, EVEX_CD8<64, CD8VF>;
8477 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
8478 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8479 EVEX_CD8<32, CD8VH>;
8481 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86cvttp2si,
8482 X86cvttp2siRnd, SchedWriteCvtPD2DQ>, VEX_W,
8483 PD, EVEX_CD8<64, CD8VF>;
8485 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86cvttp2si,
8486 X86cvttp2siRnd, SchedWriteCvtPS2DQ>, PD,
8487 EVEX_CD8<32, CD8VH>;
8489 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86cvttp2ui,
8490 X86cvttp2uiRnd, SchedWriteCvtPD2DQ>, VEX_W,
8491 PD, EVEX_CD8<64, CD8VF>;
8493 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86cvttp2ui,
8494 X86cvttp2uiRnd, SchedWriteCvtPS2DQ>, PD,
8495 EVEX_CD8<32, CD8VH>;
8497 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
8498 X86VSintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8499 EVEX_CD8<64, CD8VF>;
8501 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
8502 X86VUintToFpRnd, SchedWriteCvtDQ2PD>, VEX_W, XS,
8503 EVEX_CD8<64, CD8VF>;
8505 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
8506 X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, PS,
8507 EVEX_CD8<64, CD8VF>;
8509 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
8510 X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, XD,
8511 EVEX_CD8<64, CD8VF>;
8513 let Predicates = [HasAVX512] in {
8514 def : Pat<(v16i32 (fp_to_sint (v16f32 VR512:$src))),
8515 (VCVTTPS2DQZrr VR512:$src)>;
8516 def : Pat<(v16i32 (fp_to_sint (loadv16f32 addr:$src))),
8517 (VCVTTPS2DQZrm addr:$src)>;
8519 def : Pat<(v16i32 (fp_to_uint (v16f32 VR512:$src))),
8520 (VCVTTPS2UDQZrr VR512:$src)>;
8521 def : Pat<(v16i32 (fp_to_uint (loadv16f32 addr:$src))),
8522 (VCVTTPS2UDQZrm addr:$src)>;
8524 def : Pat<(v8i32 (fp_to_sint (v8f64 VR512:$src))),
8525 (VCVTTPD2DQZrr VR512:$src)>;
8526 def : Pat<(v8i32 (fp_to_sint (loadv8f64 addr:$src))),
8527 (VCVTTPD2DQZrm addr:$src)>;
8529 def : Pat<(v8i32 (fp_to_uint (v8f64 VR512:$src))),
8530 (VCVTTPD2UDQZrr VR512:$src)>;
8531 def : Pat<(v8i32 (fp_to_uint (loadv8f64 addr:$src))),
8532 (VCVTTPD2UDQZrm addr:$src)>;
8535 let Predicates = [HasVLX] in {
8536 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128X:$src))),
8537 (VCVTTPS2DQZ128rr VR128X:$src)>;
8538 def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
8539 (VCVTTPS2DQZ128rm addr:$src)>;
8541 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src))),
8542 (VCVTTPS2UDQZ128rr VR128X:$src)>;
8543 def : Pat<(v4i32 (fp_to_uint (loadv4f32 addr:$src))),
8544 (VCVTTPS2UDQZ128rm addr:$src)>;
8546 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256X:$src))),
8547 (VCVTTPS2DQZ256rr VR256X:$src)>;
8548 def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
8549 (VCVTTPS2DQZ256rm addr:$src)>;
8551 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src))),
8552 (VCVTTPS2UDQZ256rr VR256X:$src)>;
8553 def : Pat<(v8i32 (fp_to_uint (loadv8f32 addr:$src))),
8554 (VCVTTPS2UDQZ256rm addr:$src)>;
8556 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256X:$src))),
8557 (VCVTTPD2DQZ256rr VR256X:$src)>;
8558 def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
8559 (VCVTTPD2DQZ256rm addr:$src)>;
8561 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src))),
8562 (VCVTTPD2UDQZ256rr VR256X:$src)>;
8563 def : Pat<(v4i32 (fp_to_uint (loadv4f64 addr:$src))),
8564 (VCVTTPD2UDQZ256rm addr:$src)>;
8566 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8567 // patterns have been disabled with null_frag.
8568 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8569 (VCVTPD2DQZ128rr VR128X:$src)>;
8570 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8572 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8573 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8575 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8577 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8578 (VCVTPD2DQZ128rm addr:$src)>;
8579 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8581 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8582 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8584 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8586 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8587 (VCVTPD2DQZ128rmb addr:$src)>;
8588 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8589 (v4i32 VR128X:$src0), VK2WM:$mask),
8590 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8591 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8592 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8593 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8595 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8596 // patterns have been disabled with null_frag.
8597 def : Pat<(v4i32 (X86cvttp2si (v2f64 VR128X:$src))),
8598 (VCVTTPD2DQZ128rr VR128X:$src)>;
8599 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8601 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8602 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8604 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8606 def : Pat<(v4i32 (X86cvttp2si (loadv2f64 addr:$src))),
8607 (VCVTTPD2DQZ128rm addr:$src)>;
8608 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8610 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8611 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8613 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8615 def : Pat<(v4i32 (X86cvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8616 (VCVTTPD2DQZ128rmb addr:$src)>;
8617 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8618 (v4i32 VR128X:$src0), VK2WM:$mask),
8619 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8620 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8621 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8622 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8624 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8625 // patterns have been disabled with null_frag.
8626 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8627 (VCVTPD2UDQZ128rr VR128X:$src)>;
8628 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8630 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8631 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8633 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8635 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8636 (VCVTPD2UDQZ128rm addr:$src)>;
8637 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8639 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8640 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8642 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8644 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8645 (VCVTPD2UDQZ128rmb addr:$src)>;
8646 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8647 (v4i32 VR128X:$src0), VK2WM:$mask),
8648 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8649 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8650 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8651 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8653 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8654 // patterns have been disabled with null_frag.
8655 def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
8656 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8657 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8659 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8660 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8662 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8664 def : Pat<(v4i32 (X86cvttp2ui (loadv2f64 addr:$src))),
8665 (VCVTTPD2UDQZ128rm addr:$src)>;
8666 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8668 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8669 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8671 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8673 def : Pat<(v4i32 (X86cvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))))),
8674 (VCVTTPD2UDQZ128rmb addr:$src)>;
8675 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8676 (v4i32 VR128X:$src0), VK2WM:$mask),
8677 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8678 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcast (loadf64 addr:$src))),
8679 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8680 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8683 let Predicates = [HasDQI] in {
8684 def : Pat<(v8i64 (fp_to_sint (v8f32 VR256X:$src))),
8685 (VCVTTPS2QQZrr VR256X:$src)>;
8686 def : Pat<(v8i64 (fp_to_sint (loadv8f32 addr:$src))),
8687 (VCVTTPS2QQZrm addr:$src)>;
8689 def : Pat<(v8i64 (fp_to_uint (v8f32 VR256X:$src))),
8690 (VCVTTPS2UQQZrr VR256X:$src)>;
8691 def : Pat<(v8i64 (fp_to_uint (loadv8f32 addr:$src))),
8692 (VCVTTPS2UQQZrm addr:$src)>;
8694 def : Pat<(v8i64 (fp_to_sint (v8f64 VR512:$src))),
8695 (VCVTTPD2QQZrr VR512:$src)>;
8696 def : Pat<(v8i64 (fp_to_sint (loadv8f64 addr:$src))),
8697 (VCVTTPD2QQZrm addr:$src)>;
8699 def : Pat<(v8i64 (fp_to_uint (v8f64 VR512:$src))),
8700 (VCVTTPD2UQQZrr VR512:$src)>;
8701 def : Pat<(v8i64 (fp_to_uint (loadv8f64 addr:$src))),
8702 (VCVTTPD2UQQZrm addr:$src)>;
8705 let Predicates = [HasDQI, HasVLX] in {
8706 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src))),
8707 (VCVTTPS2QQZ256rr VR128X:$src)>;
8708 def : Pat<(v4i64 (fp_to_sint (loadv4f32 addr:$src))),
8709 (VCVTTPS2QQZ256rm addr:$src)>;
8711 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src))),
8712 (VCVTTPS2UQQZ256rr VR128X:$src)>;
8713 def : Pat<(v4i64 (fp_to_uint (loadv4f32 addr:$src))),
8714 (VCVTTPS2UQQZ256rm addr:$src)>;
8716 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src))),
8717 (VCVTTPD2QQZ128rr VR128X:$src)>;
8718 def : Pat<(v2i64 (fp_to_sint (loadv2f64 addr:$src))),
8719 (VCVTTPD2QQZ128rm addr:$src)>;
8721 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src))),
8722 (VCVTTPD2UQQZ128rr VR128X:$src)>;
8723 def : Pat<(v2i64 (fp_to_uint (loadv2f64 addr:$src))),
8724 (VCVTTPD2UQQZ128rm addr:$src)>;
8726 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src))),
8727 (VCVTTPD2QQZ256rr VR256X:$src)>;
8728 def : Pat<(v4i64 (fp_to_sint (loadv4f64 addr:$src))),
8729 (VCVTTPD2QQZ256rm addr:$src)>;
8731 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src))),
8732 (VCVTTPD2UQQZ256rr VR256X:$src)>;
8733 def : Pat<(v4i64 (fp_to_uint (loadv4f64 addr:$src))),
8734 (VCVTTPD2UQQZ256rm addr:$src)>;
8737 let Predicates = [HasAVX512, NoVLX] in {
8738 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
8739 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8740 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8741 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8743 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
8744 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
8745 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
8746 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8748 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
8749 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
8750 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8751 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8753 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
8754 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8755 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8756 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8758 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
8759 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
8760 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8761 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8763 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
8764 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8765 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8766 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8768 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
8769 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
8770 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
8771 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8774 let Predicates = [HasAVX512, HasVLX] in {
8775 def : Pat<(X86vzmovl (v2i64 (bitconvert
8776 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
8777 (VCVTPD2DQZ128rr VR128X:$src)>;
8778 def : Pat<(X86vzmovl (v2i64 (bitconvert
8779 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
8780 (VCVTPD2DQZ128rm addr:$src)>;
8781 def : Pat<(X86vzmovl (v2i64 (bitconvert
8782 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
8783 (VCVTPD2UDQZ128rr VR128X:$src)>;
8784 def : Pat<(X86vzmovl (v2i64 (bitconvert
8785 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
8786 (VCVTTPD2DQZ128rr VR128X:$src)>;
8787 def : Pat<(X86vzmovl (v2i64 (bitconvert
8788 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
8789 (VCVTTPD2DQZ128rm addr:$src)>;
8790 def : Pat<(X86vzmovl (v2i64 (bitconvert
8791 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
8792 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8794 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8795 (VCVTDQ2PDZ128rm addr:$src)>;
8796 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8797 (VCVTDQ2PDZ128rm addr:$src)>;
8799 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8800 (VCVTUDQ2PDZ128rm addr:$src)>;
8801 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
8802 (VCVTUDQ2PDZ128rm addr:$src)>;
8805 let Predicates = [HasAVX512] in {
8806 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
8807 (VCVTPD2PSZrm addr:$src)>;
8808 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
8809 (VCVTPS2PDZrm addr:$src)>;
8812 let Predicates = [HasDQI, HasVLX] in {
8813 def : Pat<(X86vzmovl (v2f64 (bitconvert
8814 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
8815 (VCVTQQ2PSZ128rr VR128X:$src)>;
8816 def : Pat<(X86vzmovl (v2f64 (bitconvert
8817 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
8818 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8821 let Predicates = [HasDQI, NoVLX] in {
8822 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
8823 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8824 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8825 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8827 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
8828 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
8829 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8830 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8832 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
8833 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
8834 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8835 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8837 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
8838 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8839 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8840 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8842 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
8843 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
8844 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
8845 VR128X:$src1, sub_xmm)))), sub_ymm)>;
8847 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
8848 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
8849 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
8850 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8852 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
8853 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
8854 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8855 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8857 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
8858 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8859 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8860 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8862 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
8863 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
8864 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8865 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8867 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
8868 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
8869 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8870 VR256X:$src1, sub_ymm)))), sub_xmm)>;
8872 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
8873 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8874 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8875 VR128X:$src1, sub_xmm)))), sub_xmm)>;
8877 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
8878 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
8879 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8880 VR256X:$src1, sub_ymm)))), sub_ymm)>;
8883 //===----------------------------------------------------------------------===//
8884 // Half precision conversion instructions
8885 //===----------------------------------------------------------------------===//
8887 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8888 X86MemOperand x86memop, PatFrag ld_frag,
8889 X86FoldableSchedWrite sched> {
8890 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8891 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8892 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8893 T8PD, Sched<[sched]>;
8894 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8895 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8896 (X86cvtph2ps (_src.VT
8897 (ld_frag addr:$src)))>,
8898 T8PD, Sched<[sched.Folded]>;
8901 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8902 X86FoldableSchedWrite sched> {
8903 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8904 (ins _src.RC:$src), "vcvtph2ps",
8905 "{sae}, $src", "$src, {sae}",
8906 (X86cvtph2psRnd (_src.VT _src.RC:$src),
8907 (i32 FROUND_NO_EXC))>,
8908 T8PD, EVEX_B, Sched<[sched]>;
8911 let Predicates = [HasAVX512] in
8912 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, load,
8914 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8915 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8917 let Predicates = [HasVLX] in {
8918 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8919 load, WriteCvtPH2PSY>, EVEX, EVEX_V256,
8920 EVEX_CD8<32, CD8VH>;
8921 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8922 load, WriteCvtPH2PS>, EVEX, EVEX_V128,
8923 EVEX_CD8<32, CD8VH>;
8925 // Pattern match vcvtph2ps of a scalar i64 load.
8926 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
8927 (VCVTPH2PSZ128rm addr:$src)>;
8928 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
8929 (VCVTPH2PSZ128rm addr:$src)>;
8930 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
8931 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8932 (VCVTPH2PSZ128rm addr:$src)>;
8935 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8936 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8937 let ExeDomain = GenericDomain in {
8938 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8939 (ins _src.RC:$src1, i32u8imm:$src2),
8940 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8941 [(set _dest.RC:$dst,
8942 (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
8944 let Constraints = "$src0 = $dst" in
8945 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8946 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8947 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8948 [(set _dest.RC:$dst,
8949 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8950 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8951 Sched<[RR]>, EVEX_K;
8952 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8953 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8954 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8955 [(set _dest.RC:$dst,
8956 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
8957 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8958 Sched<[RR]>, EVEX_KZ;
8959 let hasSideEffects = 0, mayStore = 1 in {
8960 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8961 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8962 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8964 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8965 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8966 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8967 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8972 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8974 let hasSideEffects = 0 in
8975 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8976 (outs _dest.RC:$dst),
8977 (ins _src.RC:$src1, i32u8imm:$src2),
8978 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8979 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8982 let Predicates = [HasAVX512] in {
8983 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8984 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8985 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8986 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8987 let Predicates = [HasVLX] in {
8988 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8989 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8990 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8991 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8992 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8993 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8996 def : Pat<(store (f64 (extractelt
8997 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
8998 (iPTR 0))), addr:$dst),
8999 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
9000 def : Pat<(store (i64 (extractelt
9001 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
9002 (iPTR 0))), addr:$dst),
9003 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
9004 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
9005 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
9006 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
9007 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
9010 // Patterns for matching conversions from float to half-float and vice versa.
9011 let Predicates = [HasVLX] in {
9012 // Use MXCSR.RC for rounding instead of explicitly specifying the default
9013 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
9014 // configurations we support (the default). However, falling back to MXCSR is
9015 // more consistent with other instructions, which are always controlled by it.
9016 // It's encoded as 0b100.
9017 def : Pat<(fp_to_f16 FR32X:$src),
9018 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (v8i16 (VCVTPS2PHZ128rr
9019 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4))), sub_16bit))>;
9021 def : Pat<(f16_to_fp GR16:$src),
9022 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
9023 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)))), FR32X)) >;
9025 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
9026 (f32 (COPY_TO_REGCLASS (v4f32 (VCVTPH2PSZ128rr
9027 (v8i16 (VCVTPS2PHZ128rr
9028 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)), 4)))), FR32X)) >;
9031 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
9032 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
9033 string OpcodeStr, X86FoldableSchedWrite sched> {
9034 let hasSideEffects = 0 in
9035 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
9036 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
9037 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
9040 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9041 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFCom>,
9042 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9043 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFCom>,
9044 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9045 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFCom>,
9046 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
9047 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFCom>,
9048 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
9051 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
9052 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
9053 "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
9054 EVEX_CD8<32, CD8VT1>;
9055 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
9056 "ucomisd", WriteFCom>, PD, EVEX,
9057 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9058 let Pattern = []<dag> in {
9059 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
9060 "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
9061 EVEX_CD8<32, CD8VT1>;
9062 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
9063 "comisd", WriteFCom>, PD, EVEX,
9064 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9066 let isCodeGenOnly = 1 in {
9067 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
9068 sse_load_f32, "ucomiss", WriteFCom>, PS, EVEX, VEX_LIG,
9069 EVEX_CD8<32, CD8VT1>;
9070 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
9071 sse_load_f64, "ucomisd", WriteFCom>, PD, EVEX,
9072 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9074 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
9075 sse_load_f32, "comiss", WriteFCom>, PS, EVEX, VEX_LIG,
9076 EVEX_CD8<32, CD8VT1>;
9077 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
9078 sse_load_f64, "comisd", WriteFCom>, PD, EVEX,
9079 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
9083 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
9084 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9085 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9086 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
9087 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9088 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9089 "$src2, $src1", "$src1, $src2",
9090 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
9091 EVEX_4V, Sched<[sched]>;
9092 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9093 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9094 "$src2, $src1", "$src1, $src2",
9095 (OpNode (_.VT _.RC:$src1),
9096 _.ScalarIntMemCPat:$src2)>, EVEX_4V,
9097 Sched<[sched.Folded, sched.ReadAfterFold]>;
9101 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
9102 f32x_info>, EVEX_CD8<32, CD8VT1>,
9104 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
9105 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
9107 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
9108 SchedWriteFRsqrt.Scl, f32x_info>,
9109 EVEX_CD8<32, CD8VT1>, T8PD;
9110 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
9111 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
9112 EVEX_CD8<64, CD8VT1>, T8PD;
9114 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
9115 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
9116 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9117 let ExeDomain = _.ExeDomain in {
9118 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9119 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9120 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
9122 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9123 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9125 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
9126 Sched<[sched.Folded, sched.ReadAfterFold]>;
9127 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9128 (ins _.ScalarMemOp:$src), OpcodeStr,
9129 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
9131 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
9132 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9136 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
9137 X86SchedWriteWidths sched> {
9138 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
9139 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
9140 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
9141 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9143 // Define only if AVX512VL feature is present.
9144 let Predicates = [HasVLX] in {
9145 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
9146 OpNode, sched.XMM, v4f32x_info>,
9147 EVEX_V128, EVEX_CD8<32, CD8VF>;
9148 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
9149 OpNode, sched.YMM, v8f32x_info>,
9150 EVEX_V256, EVEX_CD8<32, CD8VF>;
9151 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
9152 OpNode, sched.XMM, v2f64x_info>,
9153 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
9154 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
9155 OpNode, sched.YMM, v4f64x_info>,
9156 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
9160 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
9161 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
9163 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
9164 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
9165 SDNode OpNode, X86FoldableSchedWrite sched> {
9166 let ExeDomain = _.ExeDomain in {
9167 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9168 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9169 "$src2, $src1", "$src1, $src2",
9170 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9171 (i32 FROUND_CURRENT))>,
9174 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9175 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9176 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
9177 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9178 (i32 FROUND_NO_EXC))>, EVEX_B,
9181 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9182 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9183 "$src2, $src1", "$src1, $src2",
9184 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
9185 (i32 FROUND_CURRENT))>,
9186 Sched<[sched.Folded, sched.ReadAfterFold]>;
9190 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
9191 X86FoldableSchedWrite sched> {
9192 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>,
9193 EVEX_CD8<32, CD8VT1>;
9194 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>,
9195 EVEX_CD8<64, CD8VT1>, VEX_W;
9198 let Predicates = [HasERI] in {
9199 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SchedWriteFRcp.Scl>,
9201 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s,
9202 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
9205 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds,
9206 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
9207 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
9209 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9210 SDNode OpNode, X86FoldableSchedWrite sched> {
9211 let ExeDomain = _.ExeDomain in {
9212 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9213 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9214 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>,
9217 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9218 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9220 (bitconvert (_.LdFrag addr:$src))),
9221 (i32 FROUND_CURRENT))>,
9222 Sched<[sched.Folded, sched.ReadAfterFold]>;
9224 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9225 (ins _.ScalarMemOp:$src), OpcodeStr,
9226 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
9228 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
9229 (i32 FROUND_CURRENT))>, EVEX_B,
9230 Sched<[sched.Folded, sched.ReadAfterFold]>;
9233 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9234 SDNode OpNode, X86FoldableSchedWrite sched> {
9235 let ExeDomain = _.ExeDomain in
9236 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9237 (ins _.RC:$src), OpcodeStr,
9238 "{sae}, $src", "$src, {sae}",
9239 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>,
9240 EVEX_B, Sched<[sched]>;
9243 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
9244 X86SchedWriteWidths sched> {
9245 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9246 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
9247 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
9248 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9249 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
9250 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
9253 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
9254 SDNode OpNode, X86SchedWriteWidths sched> {
9255 // Define only if AVX512VL feature is present.
9256 let Predicates = [HasVLX] in {
9257 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched.XMM>,
9258 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
9259 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched.YMM>,
9260 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
9261 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched.XMM>,
9262 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9263 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched.YMM>,
9264 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
9268 let Predicates = [HasERI] in {
9269 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SchedWriteFRsqrt>, EVEX;
9270 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SchedWriteFRcp>, EVEX;
9271 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SchedWriteFAdd>, EVEX;
9273 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SchedWriteFRnd>,
9274 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
9275 SchedWriteFRnd>, EVEX;
9277 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
9278 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9279 let ExeDomain = _.ExeDomain in
9280 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9281 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
9282 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>,
9283 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
9286 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
9287 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
9288 let ExeDomain = _.ExeDomain in {
9289 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9290 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9291 (_.VT (fsqrt _.RC:$src))>, EVEX,
9293 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9294 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
9296 (bitconvert (_.LdFrag addr:$src))))>, EVEX,
9297 Sched<[sched.Folded, sched.ReadAfterFold]>;
9298 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9299 (ins _.ScalarMemOp:$src), OpcodeStr,
9300 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
9302 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
9303 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
9307 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
9308 X86SchedWriteSizes sched> {
9309 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9310 sched.PS.ZMM, v16f32_info>,
9311 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9312 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9313 sched.PD.ZMM, v8f64_info>,
9314 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9315 // Define only if AVX512VL feature is present.
9316 let Predicates = [HasVLX] in {
9317 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9318 sched.PS.XMM, v4f32x_info>,
9319 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
9320 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
9321 sched.PS.YMM, v8f32x_info>,
9322 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
9323 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9324 sched.PD.XMM, v2f64x_info>,
9325 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9326 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
9327 sched.PD.YMM, v4f64x_info>,
9328 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9332 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
9333 X86SchedWriteSizes sched> {
9334 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
9335 sched.PS.ZMM, v16f32_info>,
9336 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
9337 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
9338 sched.PD.ZMM, v8f64_info>,
9339 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
9342 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9343 X86VectorVTInfo _, string Name> {
9344 let ExeDomain = _.ExeDomain in {
9345 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9346 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
9347 "$src2, $src1", "$src1, $src2",
9348 (X86fsqrtRnds (_.VT _.RC:$src1),
9350 (i32 FROUND_CURRENT))>,
9352 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9353 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
9354 "$src2, $src1", "$src1, $src2",
9355 (X86fsqrtRnds (_.VT _.RC:$src1),
9356 _.ScalarIntMemCPat:$src2,
9357 (i32 FROUND_CURRENT))>,
9358 Sched<[sched.Folded, sched.ReadAfterFold]>;
9359 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9360 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
9361 "$rc, $src2, $src1", "$src1, $src2, $rc",
9362 (X86fsqrtRnds (_.VT _.RC:$src1),
9365 EVEX_B, EVEX_RC, Sched<[sched]>;
9367 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
9368 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9369 (ins _.FRC:$src1, _.FRC:$src2),
9370 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9373 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9374 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
9375 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9376 Sched<[sched.Folded, sched.ReadAfterFold]>;
9380 let Predicates = [HasAVX512] in {
9381 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
9382 (!cast<Instruction>(Name#Zr)
9383 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9386 let Predicates = [HasAVX512, OptForSize] in {
9387 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
9388 (!cast<Instruction>(Name#Zm)
9389 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9393 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9394 X86SchedWriteSizes sched> {
9395 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9396 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9397 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9398 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9401 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9402 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9404 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9406 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9407 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9408 let ExeDomain = _.ExeDomain in {
9409 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9410 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9411 "$src3, $src2, $src1", "$src1, $src2, $src3",
9412 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9416 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9417 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9418 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9419 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9420 (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B,
9423 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9424 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9426 "$src3, $src2, $src1", "$src1, $src2, $src3",
9427 (_.VT (X86RndScales _.RC:$src1,
9428 _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
9429 Sched<[sched.Folded, sched.ReadAfterFold]>;
9431 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9432 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9433 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9434 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9435 []>, Sched<[sched]>;
9438 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9439 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9440 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9441 []>, Sched<[sched.Folded, sched.ReadAfterFold]>;
9445 let Predicates = [HasAVX512] in {
9446 def : Pat<(ffloor _.FRC:$src),
9447 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9448 _.FRC:$src, (i32 0x9)))>;
9449 def : Pat<(fceil _.FRC:$src),
9450 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9451 _.FRC:$src, (i32 0xa)))>;
9452 def : Pat<(ftrunc _.FRC:$src),
9453 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9454 _.FRC:$src, (i32 0xb)))>;
9455 def : Pat<(frint _.FRC:$src),
9456 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9457 _.FRC:$src, (i32 0x4)))>;
9458 def : Pat<(fnearbyint _.FRC:$src),
9459 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
9460 _.FRC:$src, (i32 0xc)))>;
9463 let Predicates = [HasAVX512, OptForSize] in {
9464 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
9465 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9466 addr:$src, (i32 0x9)))>;
9467 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
9468 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9469 addr:$src, (i32 0xa)))>;
9470 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
9471 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9472 addr:$src, (i32 0xb)))>;
9473 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
9474 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9475 addr:$src, (i32 0x4)))>;
9476 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
9477 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
9478 addr:$src, (i32 0xc)))>;
9482 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9483 SchedWriteFRnd.Scl, f32x_info>,
9484 AVX512AIi8Base, EVEX_4V,
9485 EVEX_CD8<32, CD8VT1>;
9487 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9488 SchedWriteFRnd.Scl, f64x_info>,
9489 VEX_W, AVX512AIi8Base, EVEX_4V,
9490 EVEX_CD8<64, CD8VT1>;
9492 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9493 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9494 dag OutMask, Predicate BasePredicate> {
9495 let Predicates = [BasePredicate] in {
9496 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9497 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9498 (extractelt _.VT:$dst, (iPTR 0))))),
9499 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9500 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9502 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
9503 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9505 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9506 OutMask, _.VT:$src2, _.VT:$src1)>;
9510 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9511 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9512 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9513 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9514 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9515 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9517 multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
9518 X86VectorVTInfo _, PatLeaf ZeroFP,
9519 bits<8> ImmV, Predicate BasePredicate> {
9520 let Predicates = [BasePredicate] in {
9521 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
9522 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9523 (extractelt _.VT:$dst, (iPTR 0))))),
9524 (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
9525 _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
9527 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
9528 (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
9529 (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
9530 VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
9534 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
9535 v4f32x_info, fp32imm0, 0x01, HasAVX512>;
9536 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
9537 v4f32x_info, fp32imm0, 0x02, HasAVX512>;
9538 defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
9539 v2f64x_info, fp64imm0, 0x01, HasAVX512>;
9540 defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
9541 v2f64x_info, fp64imm0, 0x02, HasAVX512>;
9544 //-------------------------------------------------
9545 // Integer truncate and extend operations
9546 //-------------------------------------------------
9548 // PatFrags that contain a select and a truncate op. The take operands in the
9549 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9550 // either to the multiclasses.
9551 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9552 (vselect node:$mask,
9553 (trunc node:$src), node:$src0)>;
9554 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9555 (vselect node:$mask,
9556 (X86vtruncs node:$src), node:$src0)>;
9557 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9558 (vselect node:$mask,
9559 (X86vtruncus node:$src), node:$src0)>;
9561 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9562 SDPatternOperator MaskNode,
9563 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9564 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9565 let ExeDomain = DestInfo.ExeDomain in {
9566 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9567 (ins SrcInfo.RC:$src),
9568 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9569 [(set DestInfo.RC:$dst,
9570 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9571 EVEX, Sched<[sched]>;
9572 let Constraints = "$src0 = $dst" in
9573 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9574 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9575 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9576 [(set DestInfo.RC:$dst,
9577 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9578 (DestInfo.VT DestInfo.RC:$src0),
9579 SrcInfo.KRCWM:$mask))]>,
9580 EVEX, EVEX_K, Sched<[sched]>;
9581 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9582 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9583 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9584 [(set DestInfo.RC:$dst,
9585 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9586 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9587 EVEX, EVEX_KZ, Sched<[sched]>;
9590 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9591 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9592 (ins x86memop:$dst, SrcInfo.RC:$src),
9593 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9594 EVEX, Sched<[sched.Folded]>;
9596 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9597 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9598 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9599 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9600 }//mayStore = 1, hasSideEffects = 0
9603 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9604 X86VectorVTInfo DestInfo,
9605 PatFrag truncFrag, PatFrag mtruncFrag,
9608 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9609 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mr)
9610 addr:$dst, SrcInfo.RC:$src)>;
9612 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9613 SrcInfo.KRCWM:$mask),
9614 (!cast<Instruction>(Name#SrcInfo.ZSuffix##mrk)
9615 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9618 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9619 SDNode OpNode256, SDNode OpNode512,
9620 SDPatternOperator MaskNode128,
9621 SDPatternOperator MaskNode256,
9622 SDPatternOperator MaskNode512,
9623 X86FoldableSchedWrite sched,
9624 AVX512VLVectorVTInfo VTSrcInfo,
9625 X86VectorVTInfo DestInfoZ128,
9626 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9627 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9628 X86MemOperand x86memopZ, PatFrag truncFrag,
9629 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9631 let Predicates = [HasVLX, prd] in {
9632 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9633 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9634 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9635 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9637 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9638 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9639 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9640 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9642 let Predicates = [prd] in
9643 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9644 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9645 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9646 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9649 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9650 SDPatternOperator MaskNode,
9651 X86FoldableSchedWrite sched, PatFrag StoreNode,
9652 PatFrag MaskedStoreNode, SDNode InVecNode,
9653 SDPatternOperator InVecMaskNode> {
9654 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9655 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9656 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9657 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9658 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9661 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9662 SDPatternOperator MaskNode,
9663 X86FoldableSchedWrite sched, PatFrag StoreNode,
9664 PatFrag MaskedStoreNode, SDNode InVecNode,
9665 SDPatternOperator InVecMaskNode> {
9666 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9667 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9668 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9669 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9670 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9673 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9674 SDPatternOperator MaskNode,
9675 X86FoldableSchedWrite sched, PatFrag StoreNode,
9676 PatFrag MaskedStoreNode, SDNode InVecNode,
9677 SDPatternOperator InVecMaskNode> {
9678 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9679 InVecMaskNode, MaskNode, MaskNode, sched,
9680 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9681 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9682 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9685 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9686 SDPatternOperator MaskNode,
9687 X86FoldableSchedWrite sched, PatFrag StoreNode,
9688 PatFrag MaskedStoreNode, SDNode InVecNode,
9689 SDPatternOperator InVecMaskNode> {
9690 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9691 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9692 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9693 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9694 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9697 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9698 SDPatternOperator MaskNode,
9699 X86FoldableSchedWrite sched, PatFrag StoreNode,
9700 PatFrag MaskedStoreNode, SDNode InVecNode,
9701 SDPatternOperator InVecMaskNode> {
9702 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9703 InVecMaskNode, MaskNode, MaskNode, sched,
9704 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9705 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9706 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9709 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9710 SDPatternOperator MaskNode,
9711 X86FoldableSchedWrite sched, PatFrag StoreNode,
9712 PatFrag MaskedStoreNode, SDNode InVecNode,
9713 SDPatternOperator InVecMaskNode> {
9714 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9715 InVecMaskNode, MaskNode, MaskNode, sched,
9716 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9717 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9718 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9721 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
9722 WriteShuffle256, truncstorevi8,
9723 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9724 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
9725 WriteShuffle256, truncstore_s_vi8,
9726 masked_truncstore_s_vi8, X86vtruncs,
9728 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9729 select_truncus, WriteShuffle256,
9730 truncstore_us_vi8, masked_truncstore_us_vi8,
9731 X86vtruncus, X86vmtruncus>;
9733 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9734 WriteShuffle256, truncstorevi16,
9735 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9736 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9737 WriteShuffle256, truncstore_s_vi16,
9738 masked_truncstore_s_vi16, X86vtruncs,
9740 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9741 select_truncus, WriteShuffle256,
9742 truncstore_us_vi16, masked_truncstore_us_vi16,
9743 X86vtruncus, X86vmtruncus>;
9745 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9746 WriteShuffle256, truncstorevi32,
9747 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9748 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9749 WriteShuffle256, truncstore_s_vi32,
9750 masked_truncstore_s_vi32, X86vtruncs,
9752 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9753 select_truncus, WriteShuffle256,
9754 truncstore_us_vi32, masked_truncstore_us_vi32,
9755 X86vtruncus, X86vmtruncus>;
9757 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9758 WriteShuffle256, truncstorevi8,
9759 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9760 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9761 WriteShuffle256, truncstore_s_vi8,
9762 masked_truncstore_s_vi8, X86vtruncs,
9764 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9765 select_truncus, WriteShuffle256,
9766 truncstore_us_vi8, masked_truncstore_us_vi8,
9767 X86vtruncus, X86vmtruncus>;
9769 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9770 WriteShuffle256, truncstorevi16,
9771 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9772 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9773 WriteShuffle256, truncstore_s_vi16,
9774 masked_truncstore_s_vi16, X86vtruncs,
9776 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9777 select_truncus, WriteShuffle256,
9778 truncstore_us_vi16, masked_truncstore_us_vi16,
9779 X86vtruncus, X86vmtruncus>;
9781 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9782 WriteShuffle256, truncstorevi8,
9783 masked_truncstorevi8, X86vtrunc,
9785 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9786 WriteShuffle256, truncstore_s_vi8,
9787 masked_truncstore_s_vi8, X86vtruncs,
9789 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9790 select_truncus, WriteShuffle256,
9791 truncstore_us_vi8, masked_truncstore_us_vi8,
9792 X86vtruncus, X86vmtruncus>;
9794 let Predicates = [HasAVX512, NoVLX] in {
9795 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9796 (v8i16 (EXTRACT_SUBREG
9797 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9798 VR256X:$src, sub_ymm)))), sub_xmm))>;
9799 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9800 (v4i32 (EXTRACT_SUBREG
9801 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9802 VR256X:$src, sub_ymm)))), sub_xmm))>;
9805 let Predicates = [HasBWI, NoVLX] in {
9806 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9807 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9808 VR256X:$src, sub_ymm))), sub_xmm))>;
9811 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9812 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9813 X86VectorVTInfo DestInfo,
9814 X86VectorVTInfo SrcInfo> {
9815 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9817 SrcInfo.KRCWM:$mask)),
9818 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9819 SrcInfo.KRCWM:$mask,
9822 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9823 DestInfo.ImmAllZerosV,
9824 SrcInfo.KRCWM:$mask)),
9825 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9829 let Predicates = [HasVLX] in {
9830 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9831 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9832 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9835 let Predicates = [HasAVX512] in {
9836 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9837 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9838 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9840 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9841 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9842 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9844 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9845 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9846 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9849 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9850 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9851 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9852 let ExeDomain = DestInfo.ExeDomain in {
9853 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9854 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9855 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9856 EVEX, Sched<[sched]>;
9858 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9859 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9860 (DestInfo.VT (LdFrag addr:$src))>,
9861 EVEX, Sched<[sched.Folded]>;
9865 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9866 SDNode OpNode, SDNode InVecNode, string ExtTy,
9867 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9868 let Predicates = [HasVLX, HasBWI] in {
9869 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9870 v16i8x_info, i64mem, LdFrag, InVecNode>,
9871 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9873 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9874 v16i8x_info, i128mem, LdFrag, OpNode>,
9875 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9877 let Predicates = [HasBWI] in {
9878 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9879 v32i8x_info, i256mem, LdFrag, OpNode>,
9880 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9884 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9885 SDNode OpNode, SDNode InVecNode, string ExtTy,
9886 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9887 let Predicates = [HasVLX, HasAVX512] in {
9888 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9889 v16i8x_info, i32mem, LdFrag, InVecNode>,
9890 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9892 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9893 v16i8x_info, i64mem, LdFrag, InVecNode>,
9894 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9896 let Predicates = [HasAVX512] in {
9897 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9898 v16i8x_info, i128mem, LdFrag, OpNode>,
9899 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9903 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9904 SDNode OpNode, SDNode InVecNode, string ExtTy,
9905 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9906 let Predicates = [HasVLX, HasAVX512] in {
9907 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9908 v16i8x_info, i16mem, LdFrag, InVecNode>,
9909 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9911 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9912 v16i8x_info, i32mem, LdFrag, InVecNode>,
9913 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9915 let Predicates = [HasAVX512] in {
9916 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9917 v16i8x_info, i64mem, LdFrag, InVecNode>,
9918 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9922 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9923 SDNode OpNode, SDNode InVecNode, string ExtTy,
9924 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9925 let Predicates = [HasVLX, HasAVX512] in {
9926 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9927 v8i16x_info, i64mem, LdFrag, InVecNode>,
9928 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9930 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9931 v8i16x_info, i128mem, LdFrag, OpNode>,
9932 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9934 let Predicates = [HasAVX512] in {
9935 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9936 v16i16x_info, i256mem, LdFrag, OpNode>,
9937 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9941 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9942 SDNode OpNode, SDNode InVecNode, string ExtTy,
9943 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9944 let Predicates = [HasVLX, HasAVX512] in {
9945 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9946 v8i16x_info, i32mem, LdFrag, InVecNode>,
9947 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9949 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9950 v8i16x_info, i64mem, LdFrag, InVecNode>,
9951 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9953 let Predicates = [HasAVX512] in {
9954 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9955 v8i16x_info, i128mem, LdFrag, OpNode>,
9956 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9960 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9961 SDNode OpNode, SDNode InVecNode, string ExtTy,
9962 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9964 let Predicates = [HasVLX, HasAVX512] in {
9965 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9966 v4i32x_info, i64mem, LdFrag, InVecNode>,
9967 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9969 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9970 v4i32x_info, i128mem, LdFrag, OpNode>,
9971 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9973 let Predicates = [HasAVX512] in {
9974 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9975 v8i32x_info, i256mem, LdFrag, OpNode>,
9976 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9980 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9981 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9982 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9983 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9984 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9985 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9987 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9988 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9989 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9990 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9991 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9992 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9995 // Patterns that we also need any extend versions of. aext_vector_inreg
9996 // is currently legalized to zext_vector_inreg.
9997 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9999 let Predicates = [HasVLX, HasBWI] in {
10000 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
10001 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10002 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
10003 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10004 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
10005 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
10008 let Predicates = [HasVLX] in {
10009 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
10010 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10011 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
10012 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10013 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
10014 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
10016 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
10017 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10018 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
10019 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10020 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
10021 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
10024 // 512-bit patterns
10025 let Predicates = [HasBWI] in {
10026 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
10027 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
10029 let Predicates = [HasAVX512] in {
10030 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
10031 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
10032 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
10033 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
10035 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
10036 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
10038 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
10039 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
10043 multiclass AVX512_pmovx_patterns_aext<string OpcPrefix, SDNode ExtOp> :
10044 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10045 let Predicates = [HasVLX, HasBWI] in {
10046 def : Pat<(v16i16 (ExtOp (v16i8 VR128X:$src))),
10047 (!cast<I>(OpcPrefix#BWZ256rr) VR128X:$src)>;
10050 let Predicates = [HasVLX] in {
10051 def : Pat<(v8i32 (ExtOp (v8i16 VR128X:$src))),
10052 (!cast<I>(OpcPrefix#WDZ256rr) VR128X:$src)>;
10054 def : Pat<(v4i64 (ExtOp (v4i32 VR128X:$src))),
10055 (!cast<I>(OpcPrefix#DQZ256rr) VR128X:$src)>;
10058 // 512-bit patterns
10059 let Predicates = [HasBWI] in {
10060 def : Pat<(v32i16 (ExtOp (v32i8 VR256X:$src))),
10061 (!cast<I>(OpcPrefix#BWZrr) VR256X:$src)>;
10063 let Predicates = [HasAVX512] in {
10064 def : Pat<(v16i32 (ExtOp (v16i8 VR128X:$src))),
10065 (!cast<I>(OpcPrefix#BDZrr) VR128X:$src)>;
10066 def : Pat<(v16i32 (ExtOp (v16i16 VR256X:$src))),
10067 (!cast<I>(OpcPrefix#WDZrr) VR256X:$src)>;
10069 def : Pat<(v8i64 (ExtOp (v8i16 VR128X:$src))),
10070 (!cast<I>(OpcPrefix#WQZrr) VR128X:$src)>;
10072 def : Pat<(v8i64 (ExtOp (v8i32 VR256X:$src))),
10073 (!cast<I>(OpcPrefix#DQZrr) VR256X:$src)>;
10078 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
10080 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
10081 // 128-bit patterns
10082 let Predicates = [HasVLX, HasBWI] in {
10083 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10084 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10085 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10086 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10087 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
10088 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10089 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
10090 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10091 def : Pat<(v8i16 (InVecOp (loadv16i8 addr:$src))),
10092 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
10094 let Predicates = [HasVLX] in {
10095 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10096 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10097 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
10098 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10099 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
10100 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10101 def : Pat<(v4i32 (InVecOp (loadv16i8 addr:$src))),
10102 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
10104 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
10105 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10106 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
10107 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10108 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
10109 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10110 def : Pat<(v2i64 (InVecOp (loadv16i8 addr:$src))),
10111 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
10113 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10114 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10115 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10116 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10117 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
10118 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10119 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
10120 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10121 def : Pat<(v4i32 (InVecOp (loadv8i16 addr:$src))),
10122 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
10124 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10125 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10126 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
10127 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10128 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
10129 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10130 def : Pat<(v2i64 (InVecOp (loadv8i16 addr:$src))),
10131 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
10133 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10134 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10135 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
10136 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10137 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
10138 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10139 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
10140 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10141 def : Pat<(v2i64 (InVecOp (loadv4i32 addr:$src))),
10142 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
10144 let Predicates = [HasVLX] in {
10145 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10146 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10147 def : Pat<(v8i32 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
10148 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10149 def : Pat<(v8i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
10150 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10151 def : Pat<(v8i32 (InVecOp (loadv16i8 addr:$src))),
10152 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
10154 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
10155 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10156 def : Pat<(v4i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
10157 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10158 def : Pat<(v4i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
10159 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10160 def : Pat<(v4i64 (InVecOp (loadv16i8 addr:$src))),
10161 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
10163 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10164 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10165 def : Pat<(v4i64 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
10166 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10167 def : Pat<(v4i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
10168 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10169 def : Pat<(v4i64 (InVecOp (loadv8i16 addr:$src))),
10170 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
10172 // 512-bit patterns
10173 let Predicates = [HasAVX512] in {
10174 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
10175 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10176 def : Pat<(v8i64 (InVecOp (loadv16i8 addr:$src))),
10177 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
10181 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
10182 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
10183 defm : AVX512_pmovx_patterns_aext<"VPMOVZX", anyext>;
10185 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10186 // ext+trunc aggresively making it impossible to legalize the DAG to this
10187 // pattern directly.
10188 let Predicates = [HasAVX512, NoBWI] in {
10189 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10190 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10191 def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
10192 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10193 def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
10194 (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10197 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
10198 // ext+trunc aggresively making it impossible to legalize the DAG to this
10199 // pattern directly.
10200 let Predicates = [HasAVX512, NoBWI] in {
10201 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
10202 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10203 def: Pat<(v16i8 (trunc (bc_v16i16 (loadv4i64 addr:$src)))),
10204 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
10205 def: Pat<(store (v16i8 (trunc (v16i16 VR256X:$src))), addr:$dst),
10206 (VPMOVDBZmr addr:$dst, (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
10209 //===----------------------------------------------------------------------===//
10210 // GATHER - SCATTER Operations
10212 // FIXME: Improve scheduling of gather/scatter instructions.
10213 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10214 X86MemOperand memop, PatFrag GatherNode,
10215 RegisterClass MaskRC = _.KRCWM> {
10216 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
10217 ExeDomain = _.ExeDomain in
10218 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
10219 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
10220 !strconcat(OpcodeStr#_.Suffix,
10221 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
10222 [(set _.RC:$dst, MaskRC:$mask_wb,
10223 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
10224 vectoraddr:$src2))]>, EVEX, EVEX_K,
10225 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
10228 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
10229 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10230 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
10231 vy512xmem, mgatherv8i32>, EVEX_V512, VEX_W;
10232 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
10233 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
10234 let Predicates = [HasVLX] in {
10235 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
10236 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
10237 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
10238 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
10239 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
10240 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
10241 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
10242 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
10246 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
10247 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10248 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
10249 mgatherv16i32>, EVEX_V512;
10250 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256mem,
10251 mgatherv8i64>, EVEX_V512;
10252 let Predicates = [HasVLX] in {
10253 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
10254 vy256xmem, mgatherv8i32>, EVEX_V256;
10255 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
10256 vy128xmem, mgatherv4i64>, EVEX_V256;
10257 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
10258 vx128xmem, mgatherv4i32>, EVEX_V128;
10259 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
10260 vx64xmem, mgatherv2i64, VK2WM>,
10266 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
10267 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
10269 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
10270 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
10272 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
10273 X86MemOperand memop, PatFrag ScatterNode,
10274 RegisterClass MaskRC = _.KRCWM> {
10276 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
10278 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
10279 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
10280 !strconcat(OpcodeStr#_.Suffix,
10281 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
10282 [(set MaskRC:$mask_wb, (ScatterNode (_.VT _.RC:$src),
10283 MaskRC:$mask, vectoraddr:$dst))]>,
10284 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10285 Sched<[WriteStore]>;
10288 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
10289 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10290 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
10291 vy512xmem, mscatterv8i32>, EVEX_V512, VEX_W;
10292 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
10293 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
10294 let Predicates = [HasVLX] in {
10295 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
10296 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
10297 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
10298 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
10299 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
10300 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
10301 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
10302 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
10306 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
10307 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
10308 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
10309 mscatterv16i32>, EVEX_V512;
10310 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256mem,
10311 mscatterv8i64>, EVEX_V512;
10312 let Predicates = [HasVLX] in {
10313 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
10314 vy256xmem, mscatterv8i32>, EVEX_V256;
10315 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
10316 vy128xmem, mscatterv4i64>, EVEX_V256;
10317 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
10318 vx128xmem, mscatterv4i32>, EVEX_V128;
10319 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
10320 vx64xmem, mscatterv2i64, VK2WM>,
10325 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
10326 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
10328 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
10329 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
10332 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
10333 RegisterClass KRC, X86MemOperand memop> {
10334 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
10335 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
10336 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
10337 EVEX, EVEX_K, Sched<[WriteLoad]>;
10340 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
10341 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10343 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10344 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10346 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10347 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10349 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
10350 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10352 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
10353 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10355 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10356 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10358 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10359 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10361 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
10362 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10364 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
10365 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10367 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10368 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10370 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10371 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10373 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
10374 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10376 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
10377 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
10379 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10380 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10382 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10383 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
10385 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
10386 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
10388 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
10389 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
10390 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
10391 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
10392 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
10394 // Also need a pattern for anyextend.
10395 def : Pat<(Vec.VT (anyext Vec.KRC:$src)),
10396 (!cast<Instruction>(NAME#"rr") Vec.KRC:$src)>;
10399 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
10400 string OpcodeStr, Predicate prd> {
10401 let Predicates = [prd] in
10402 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
10404 let Predicates = [prd, HasVLX] in {
10405 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
10406 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
10410 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
10411 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
10412 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
10413 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
10415 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
10416 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
10417 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
10418 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
10419 EVEX, Sched<[WriteMove]>;
10422 // Use 512bit version to implement 128/256 bit in case NoVLX.
10423 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
10427 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
10428 (_.KVT (COPY_TO_REGCLASS
10429 (!cast<Instruction>(Name#"Zrr")
10430 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
10431 _.RC:$src, _.SubRegIdx)),
10435 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
10436 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10437 let Predicates = [prd] in
10438 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
10441 let Predicates = [prd, HasVLX] in {
10442 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
10444 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
10447 let Predicates = [prd, NoVLX] in {
10448 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
10449 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
10453 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
10454 avx512vl_i8_info, HasBWI>;
10455 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
10456 avx512vl_i16_info, HasBWI>, VEX_W;
10457 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
10458 avx512vl_i32_info, HasDQI>;
10459 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
10460 avx512vl_i64_info, HasDQI>, VEX_W;
10462 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
10463 // is available, but BWI is not. We can't handle this in lowering because
10464 // a target independent DAG combine likes to combine sext and trunc.
10465 let Predicates = [HasDQI, NoBWI] in {
10466 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
10467 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10468 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
10469 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10471 def : Pat<(v16i8 (anyext (v16i1 VK16:$src))),
10472 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10473 def : Pat<(v16i16 (anyext (v16i1 VK16:$src))),
10474 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
10477 let Predicates = [HasDQI, NoBWI, HasVLX] in {
10478 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
10479 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10481 def : Pat<(v8i16 (anyext (v8i1 VK8:$src))),
10482 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
10485 //===----------------------------------------------------------------------===//
10486 // AVX-512 - COMPRESS and EXPAND
10489 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
10490 string OpcodeStr, X86FoldableSchedWrite sched> {
10491 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
10492 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10493 (_.VT (X86compress _.RC:$src1))>, AVX5128IBase,
10496 let mayStore = 1, hasSideEffects = 0 in
10497 def mr : AVX5128I<opc, MRMDestMem, (outs),
10498 (ins _.MemOp:$dst, _.RC:$src),
10499 OpcodeStr # "\t{$src, $dst|$dst, $src}",
10500 []>, EVEX_CD8<_.EltSize, CD8VT1>,
10501 Sched<[sched.Folded]>;
10503 def mrk : AVX5128I<opc, MRMDestMem, (outs),
10504 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
10505 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
10507 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
10508 Sched<[sched.Folded]>;
10511 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10512 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
10513 (!cast<Instruction>(Name#_.ZSuffix##mrk)
10514 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
10517 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
10518 X86FoldableSchedWrite sched,
10519 AVX512VLVectorVTInfo VTInfo,
10520 Predicate Pred = HasAVX512> {
10521 let Predicates = [Pred] in
10522 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
10523 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10525 let Predicates = [Pred, HasVLX] in {
10526 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
10527 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10528 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
10529 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10533 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10534 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10535 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10536 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10537 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10538 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10539 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10540 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10541 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10544 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10545 string OpcodeStr, X86FoldableSchedWrite sched> {
10546 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10547 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10548 (_.VT (X86expand _.RC:$src1))>, AVX5128IBase,
10551 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10552 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10553 (_.VT (X86expand (_.VT (bitconvert
10554 (_.LdFrag addr:$src1)))))>,
10555 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10556 Sched<[sched.Folded, sched.ReadAfterFold]>;
10559 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10561 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10562 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10563 _.KRCWM:$mask, addr:$src)>;
10565 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10566 (!cast<Instruction>(Name#_.ZSuffix##rmkz)
10567 _.KRCWM:$mask, addr:$src)>;
10569 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10570 (_.VT _.RC:$src0))),
10571 (!cast<Instruction>(Name#_.ZSuffix##rmk)
10572 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10575 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10576 X86FoldableSchedWrite sched,
10577 AVX512VLVectorVTInfo VTInfo,
10578 Predicate Pred = HasAVX512> {
10579 let Predicates = [Pred] in
10580 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10581 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10583 let Predicates = [Pred, HasVLX] in {
10584 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10585 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10586 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10587 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10591 // FIXME: Is there a better scheduler class for VPEXPAND?
10592 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10593 avx512vl_i32_info>, EVEX;
10594 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10595 avx512vl_i64_info>, EVEX, VEX_W;
10596 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10597 avx512vl_f32_info>, EVEX;
10598 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10599 avx512vl_f64_info>, EVEX, VEX_W;
10601 //handle instruction reg_vec1 = op(reg_vec,imm)
10603 // op(broadcast(eltVt),imm)
10604 //all instruction created with FROUND_CURRENT
10605 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10606 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10607 let ExeDomain = _.ExeDomain in {
10608 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10609 (ins _.RC:$src1, i32u8imm:$src2),
10610 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10611 (OpNode (_.VT _.RC:$src1),
10612 (i32 imm:$src2))>, Sched<[sched]>;
10613 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10614 (ins _.MemOp:$src1, i32u8imm:$src2),
10615 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
10616 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10618 Sched<[sched.Folded, sched.ReadAfterFold]>;
10619 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10620 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10621 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
10622 "${src1}"##_.BroadcastStr##", $src2",
10623 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
10624 (i32 imm:$src2))>, EVEX_B,
10625 Sched<[sched.Folded, sched.ReadAfterFold]>;
10629 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10630 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10631 SDNode OpNode, X86FoldableSchedWrite sched,
10632 X86VectorVTInfo _> {
10633 let ExeDomain = _.ExeDomain in
10634 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10635 (ins _.RC:$src1, i32u8imm:$src2),
10636 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
10637 "$src1, {sae}, $src2",
10638 (OpNode (_.VT _.RC:$src1),
10640 (i32 FROUND_NO_EXC))>,
10641 EVEX_B, Sched<[sched]>;
10644 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10645 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10646 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10647 let Predicates = [prd] in {
10648 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM,
10650 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
10651 sched.ZMM, _.info512>, EVEX_V512;
10653 let Predicates = [prd, HasVLX] in {
10654 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM,
10655 _.info128>, EVEX_V128;
10656 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM,
10657 _.info256>, EVEX_V256;
10661 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10662 // op(reg_vec2,mem_vec,imm)
10663 // op(reg_vec2,broadcast(eltVt),imm)
10664 //all instruction created with FROUND_CURRENT
10665 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10666 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10667 let ExeDomain = _.ExeDomain in {
10668 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10669 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10670 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10671 (OpNode (_.VT _.RC:$src1),
10675 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10676 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10677 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10678 (OpNode (_.VT _.RC:$src1),
10679 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10681 Sched<[sched.Folded, sched.ReadAfterFold]>;
10682 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10683 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10684 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10685 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10686 (OpNode (_.VT _.RC:$src1),
10687 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10688 (i32 imm:$src3))>, EVEX_B,
10689 Sched<[sched.Folded, sched.ReadAfterFold]>;
10693 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10694 // op(reg_vec2,mem_vec,imm)
10695 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10696 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10697 X86VectorVTInfo SrcInfo>{
10698 let ExeDomain = DestInfo.ExeDomain in {
10699 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10700 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10701 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10702 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10703 (SrcInfo.VT SrcInfo.RC:$src2),
10706 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10707 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10708 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10709 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10710 (SrcInfo.VT (bitconvert
10711 (SrcInfo.LdFrag addr:$src2))),
10713 Sched<[sched.Folded, sched.ReadAfterFold]>;
10717 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10718 // op(reg_vec2,mem_vec,imm)
10719 // op(reg_vec2,broadcast(eltVt),imm)
10720 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10721 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10722 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10724 let ExeDomain = _.ExeDomain in
10725 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10726 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10727 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
10728 "$src1, ${src2}"##_.BroadcastStr##", $src3",
10729 (OpNode (_.VT _.RC:$src1),
10730 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
10731 (i8 imm:$src3))>, EVEX_B,
10732 Sched<[sched.Folded, sched.ReadAfterFold]>;
10735 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10736 // op(reg_vec2,mem_scalar,imm)
10737 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10738 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10739 let ExeDomain = _.ExeDomain in {
10740 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10741 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10742 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10743 (OpNode (_.VT _.RC:$src1),
10747 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10748 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10749 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10750 (OpNode (_.VT _.RC:$src1),
10751 (_.VT (scalar_to_vector
10752 (_.ScalarLdFrag addr:$src2))),
10754 Sched<[sched.Folded, sched.ReadAfterFold]>;
10758 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10759 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10760 SDNode OpNode, X86FoldableSchedWrite sched,
10761 X86VectorVTInfo _> {
10762 let ExeDomain = _.ExeDomain in
10763 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10764 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10765 OpcodeStr, "$src3, {sae}, $src2, $src1",
10766 "$src1, $src2, {sae}, $src3",
10767 (OpNode (_.VT _.RC:$src1),
10770 (i32 FROUND_NO_EXC))>,
10771 EVEX_B, Sched<[sched]>;
10774 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10775 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10776 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10777 let ExeDomain = _.ExeDomain in
10778 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10779 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10780 OpcodeStr, "$src3, {sae}, $src2, $src1",
10781 "$src1, $src2, {sae}, $src3",
10782 (OpNode (_.VT _.RC:$src1),
10785 (i32 FROUND_NO_EXC))>,
10786 EVEX_B, Sched<[sched]>;
10789 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10790 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10791 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10792 let Predicates = [prd] in {
10793 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10794 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched.ZMM, _.info512>,
10798 let Predicates = [prd, HasVLX] in {
10799 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10801 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10806 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10807 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10808 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10809 let Predicates = [Pred] in {
10810 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10811 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10813 let Predicates = [Pred, HasVLX] in {
10814 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10815 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10816 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10817 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10821 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10822 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10823 Predicate Pred = HasAVX512> {
10824 let Predicates = [Pred] in {
10825 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10828 let Predicates = [Pred, HasVLX] in {
10829 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10831 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10836 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10837 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10838 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd> {
10839 let Predicates = [prd] in {
10840 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10841 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched.XMM, _>;
10845 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10846 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10847 SDNode OpNodeRnd, X86SchedWriteWidths sched, Predicate prd>{
10848 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10849 opcPs, OpNode, OpNodeRnd, sched, prd>,
10850 EVEX_CD8<32, CD8VF>;
10851 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10852 opcPd, OpNode, OpNodeRnd, sched, prd>,
10853 EVEX_CD8<64, CD8VF>, VEX_W;
10856 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10857 X86VReduce, X86VReduceRnd, SchedWriteFRnd, HasDQI>,
10858 AVX512AIi8Base, EVEX;
10859 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10860 X86VRndScale, X86VRndScaleRnd, SchedWriteFRnd, HasAVX512>,
10861 AVX512AIi8Base, EVEX;
10862 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10863 X86VGetMant, X86VGetMantRnd, SchedWriteFRnd, HasAVX512>,
10864 AVX512AIi8Base, EVEX;
10866 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10867 0x50, X86VRange, X86VRangeRnd,
10868 SchedWriteFAdd, HasDQI>,
10869 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10870 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10871 0x50, X86VRange, X86VRangeRnd,
10872 SchedWriteFAdd, HasDQI>,
10873 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10875 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10876 f64x_info, 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10877 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10878 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10879 0x51, X86Ranges, X86RangesRnd, SchedWriteFAdd, HasDQI>,
10880 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10882 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10883 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10884 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10885 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10886 0x57, X86Reduces, X86ReducesRnd, SchedWriteFRnd, HasDQI>,
10887 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10889 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10890 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10891 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10892 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10893 0x27, X86GetMants, X86GetMantsRnd, SchedWriteFRnd, HasAVX512>,
10894 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10897 multiclass AVX512_rndscale_lowering<X86VectorVTInfo _, string Suffix> {
10899 def : Pat<(_.VT (ffloor _.RC:$src)),
10900 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10901 _.RC:$src, (i32 0x9))>;
10902 def : Pat<(_.VT (fnearbyint _.RC:$src)),
10903 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10904 _.RC:$src, (i32 0xC))>;
10905 def : Pat<(_.VT (fceil _.RC:$src)),
10906 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10907 _.RC:$src, (i32 0xA))>;
10908 def : Pat<(_.VT (frint _.RC:$src)),
10909 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10910 _.RC:$src, (i32 0x4))>;
10911 def : Pat<(_.VT (ftrunc _.RC:$src)),
10912 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rri")
10913 _.RC:$src, (i32 0xB))>;
10916 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src), _.RC:$dst)),
10917 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10918 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10919 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src), _.RC:$dst)),
10920 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10921 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10922 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src), _.RC:$dst)),
10923 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10924 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10925 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src), _.RC:$dst)),
10926 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10927 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10928 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src), _.RC:$dst)),
10929 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrik")
10930 _.RC:$dst, _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10933 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor _.RC:$src),
10935 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10936 _.KRCWM:$mask, _.RC:$src, (i32 0x9))>;
10937 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint _.RC:$src),
10939 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10940 _.KRCWM:$mask, _.RC:$src, (i32 0xC))>;
10941 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil _.RC:$src),
10943 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10944 _.KRCWM:$mask, _.RC:$src, (i32 0xA))>;
10945 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint _.RC:$src),
10947 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10948 _.KRCWM:$mask, _.RC:$src, (i32 0x4))>;
10949 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc _.RC:$src),
10951 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rrikz")
10952 _.KRCWM:$mask, _.RC:$src, (i32 0xB))>;
10955 def : Pat<(_.VT (ffloor (_.LdFrag addr:$src))),
10956 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10957 addr:$src, (i32 0x9))>;
10958 def : Pat<(_.VT (fnearbyint (_.LdFrag addr:$src))),
10959 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10960 addr:$src, (i32 0xC))>;
10961 def : Pat<(_.VT (fceil (_.LdFrag addr:$src))),
10962 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10963 addr:$src, (i32 0xA))>;
10964 def : Pat<(_.VT (frint (_.LdFrag addr:$src))),
10965 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10966 addr:$src, (i32 0x4))>;
10967 def : Pat<(_.VT (ftrunc (_.LdFrag addr:$src))),
10968 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmi")
10969 addr:$src, (i32 0xB))>;
10971 // Merge-masking + load
10972 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10974 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10975 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10976 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
10978 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10979 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
10980 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
10982 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10983 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
10984 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
10986 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10987 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
10988 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
10990 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmik")
10991 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
10993 // Zero-masking + load
10994 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ffloor (_.LdFrag addr:$src)),
10996 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
10997 _.KRCWM:$mask, addr:$src, (i32 0x9))>;
10998 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fnearbyint (_.LdFrag addr:$src)),
11000 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
11001 _.KRCWM:$mask, addr:$src, (i32 0xC))>;
11002 def : Pat<(_.VT (vselect _.KRCWM:$mask, (fceil (_.LdFrag addr:$src)),
11004 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
11005 _.KRCWM:$mask, addr:$src, (i32 0xA))>;
11006 def : Pat<(_.VT (vselect _.KRCWM:$mask, (frint (_.LdFrag addr:$src)),
11008 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
11009 _.KRCWM:$mask, addr:$src, (i32 0x4))>;
11010 def : Pat<(_.VT (vselect _.KRCWM:$mask, (ftrunc (_.LdFrag addr:$src)),
11012 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmikz")
11013 _.KRCWM:$mask, addr:$src, (i32 0xB))>;
11016 def : Pat<(_.VT (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
11017 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
11018 addr:$src, (i32 0x9))>;
11019 def : Pat<(_.VT (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
11020 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
11021 addr:$src, (i32 0xC))>;
11022 def : Pat<(_.VT (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
11023 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
11024 addr:$src, (i32 0xA))>;
11025 def : Pat<(_.VT (frint (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
11026 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
11027 addr:$src, (i32 0x4))>;
11028 def : Pat<(_.VT (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src)))),
11029 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbi")
11030 addr:$src, (i32 0xB))>;
11032 // Merge-masking + broadcast load
11033 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11034 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11036 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
11037 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x9))>;
11038 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11039 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11041 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
11042 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xC))>;
11043 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11044 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11046 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
11047 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xA))>;
11048 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11049 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11051 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
11052 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0x4))>;
11053 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11054 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11056 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbik")
11057 _.RC:$dst, _.KRCWM:$mask, addr:$src, (i32 0xB))>;
11059 // Zero-masking + broadcast load
11060 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11061 (ffloor (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11063 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
11064 _.KRCWM:$mask, addr:$src, (i32 0x9))>;
11065 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11066 (fnearbyint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11068 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
11069 _.KRCWM:$mask, addr:$src, (i32 0xC))>;
11070 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11071 (fceil (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11073 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
11074 _.KRCWM:$mask, addr:$src, (i32 0xA))>;
11075 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11076 (frint (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11078 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
11079 _.KRCWM:$mask, addr:$src, (i32 0x4))>;
11080 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11081 (ftrunc (X86VBroadcast (_.ScalarLdFrag addr:$src))),
11083 (!cast<Instruction>("VRNDSCALE"#Suffix#_.ZSuffix#"rmbikz")
11084 _.KRCWM:$mask, addr:$src, (i32 0xB))>;
11087 let Predicates = [HasAVX512] in {
11088 defm : AVX512_rndscale_lowering<v16f32_info, "PS">;
11089 defm : AVX512_rndscale_lowering<v8f64_info, "PD">;
11092 let Predicates = [HasVLX] in {
11093 defm : AVX512_rndscale_lowering<v8f32x_info, "PS">;
11094 defm : AVX512_rndscale_lowering<v4f64x_info, "PD">;
11095 defm : AVX512_rndscale_lowering<v4f32x_info, "PS">;
11096 defm : AVX512_rndscale_lowering<v2f64x_info, "PD">;
11099 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
11100 X86FoldableSchedWrite sched,
11102 X86VectorVTInfo CastInfo,
11103 string EVEX2VEXOvrd> {
11104 let ExeDomain = _.ExeDomain in {
11105 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11106 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11107 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11109 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
11110 (i8 imm:$src3)))))>,
11111 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
11112 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11113 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11114 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11117 (CastInfo.VT (X86Shuf128 _.RC:$src1,
11118 (CastInfo.LdFrag addr:$src2),
11119 (i8 imm:$src3)))))>,
11120 Sched<[sched.Folded, sched.ReadAfterFold]>,
11121 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
11122 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11123 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11124 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
11125 "$src1, ${src2}"##_.BroadcastStr##", $src3",
11129 (X86Shuf128 _.RC:$src1,
11130 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
11131 (i8 imm:$src3)))))>, EVEX_B,
11132 Sched<[sched.Folded, sched.ReadAfterFold]>;
11136 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
11137 AVX512VLVectorVTInfo _,
11138 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
11139 string EVEX2VEXOvrd>{
11140 let Predicates = [HasAVX512] in
11141 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11142 _.info512, CastInfo.info512, "">, EVEX_V512;
11144 let Predicates = [HasAVX512, HasVLX] in
11145 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
11146 _.info256, CastInfo.info256,
11147 EVEX2VEXOvrd>, EVEX_V256;
11150 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
11151 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11152 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
11153 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11154 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
11155 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
11156 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
11157 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
11159 let Predicates = [HasAVX512] in {
11160 // Provide fallback in case the load node that is used in the broadcast
11161 // patterns above is used by additional users, which prevents the pattern
11163 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
11164 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11165 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11167 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
11168 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11169 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11172 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
11173 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11174 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11176 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
11177 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11178 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11181 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
11182 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11183 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11186 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
11187 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11188 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11192 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
11193 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11194 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
11195 // instantiation of this class.
11196 let ExeDomain = _.ExeDomain in {
11197 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11198 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
11199 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11200 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
11201 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
11202 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11203 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
11204 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
11205 (_.VT (X86VAlign _.RC:$src1,
11206 (bitconvert (_.LdFrag addr:$src2)),
11208 Sched<[sched.Folded, sched.ReadAfterFold]>,
11209 EVEX2VEXOverride<"VPALIGNRrmi">;
11211 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11212 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11213 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
11214 "$src1, ${src2}"##_.BroadcastStr##", $src3",
11215 (X86VAlign _.RC:$src1,
11216 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
11217 (i8 imm:$src3))>, EVEX_B,
11218 Sched<[sched.Folded, sched.ReadAfterFold]>;
11222 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
11223 AVX512VLVectorVTInfo _> {
11224 let Predicates = [HasAVX512] in {
11225 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
11226 AVX512AIi8Base, EVEX_4V, EVEX_V512;
11228 let Predicates = [HasAVX512, HasVLX] in {
11229 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
11230 AVX512AIi8Base, EVEX_4V, EVEX_V128;
11231 // We can't really override the 256-bit version so change it back to unset.
11232 let EVEX2VEXOverride = ? in
11233 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
11234 AVX512AIi8Base, EVEX_4V, EVEX_V256;
11238 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
11239 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11240 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
11241 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
11244 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
11245 SchedWriteShuffle, avx512vl_i8_info,
11246 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
11248 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
11250 def ValignqImm32XForm : SDNodeXForm<imm, [{
11251 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
11253 def ValignqImm8XForm : SDNodeXForm<imm, [{
11254 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
11256 def ValigndImm8XForm : SDNodeXForm<imm, [{
11257 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
11260 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
11261 X86VectorVTInfo From, X86VectorVTInfo To,
11262 SDNodeXForm ImmXForm> {
11263 def : Pat<(To.VT (vselect To.KRCWM:$mask,
11265 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11268 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
11269 To.RC:$src1, To.RC:$src2,
11270 (ImmXForm imm:$src3))>;
11272 def : Pat<(To.VT (vselect To.KRCWM:$mask,
11274 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
11277 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
11278 To.RC:$src1, To.RC:$src2,
11279 (ImmXForm imm:$src3))>;
11281 def : Pat<(To.VT (vselect To.KRCWM:$mask,
11283 (From.VT (OpNode From.RC:$src1,
11284 (From.LdFrag addr:$src2),
11287 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
11288 To.RC:$src1, addr:$src2,
11289 (ImmXForm imm:$src3))>;
11291 def : Pat<(To.VT (vselect To.KRCWM:$mask,
11293 (From.VT (OpNode From.RC:$src1,
11294 (From.LdFrag addr:$src2),
11297 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
11298 To.RC:$src1, addr:$src2,
11299 (ImmXForm imm:$src3))>;
11302 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
11303 X86VectorVTInfo From,
11304 X86VectorVTInfo To,
11305 SDNodeXForm ImmXForm> :
11306 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
11307 def : Pat<(From.VT (OpNode From.RC:$src1,
11308 (bitconvert (To.VT (X86VBroadcast
11309 (To.ScalarLdFrag addr:$src2)))),
11311 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
11312 (ImmXForm imm:$src3))>;
11314 def : Pat<(To.VT (vselect To.KRCWM:$mask,
11316 (From.VT (OpNode From.RC:$src1,
11318 (To.VT (X86VBroadcast
11319 (To.ScalarLdFrag addr:$src2)))),
11322 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
11323 To.RC:$src1, addr:$src2,
11324 (ImmXForm imm:$src3))>;
11326 def : Pat<(To.VT (vselect To.KRCWM:$mask,
11328 (From.VT (OpNode From.RC:$src1,
11330 (To.VT (X86VBroadcast
11331 (To.ScalarLdFrag addr:$src2)))),
11334 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
11335 To.RC:$src1, addr:$src2,
11336 (ImmXForm imm:$src3))>;
11339 let Predicates = [HasAVX512] in {
11340 // For 512-bit we lower to the widest element type we can. So we only need
11341 // to handle converting valignq to valignd.
11342 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
11343 v16i32_info, ValignqImm32XForm>;
11346 let Predicates = [HasVLX] in {
11347 // For 128-bit we lower to the widest element type we can. So we only need
11348 // to handle converting valignq to valignd.
11349 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
11350 v4i32x_info, ValignqImm32XForm>;
11351 // For 256-bit we lower to the widest element type we can. So we only need
11352 // to handle converting valignq to valignd.
11353 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
11354 v8i32x_info, ValignqImm32XForm>;
11357 let Predicates = [HasVLX, HasBWI] in {
11358 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
11359 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
11360 v16i8x_info, ValignqImm8XForm>;
11361 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
11362 v16i8x_info, ValigndImm8XForm>;
11365 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
11366 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
11367 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
11369 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
11370 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11371 let ExeDomain = _.ExeDomain in {
11372 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11373 (ins _.RC:$src1), OpcodeStr,
11375 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase,
11378 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11379 (ins _.MemOp:$src1), OpcodeStr,
11381 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
11382 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
11383 Sched<[sched.Folded]>;
11387 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
11388 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
11389 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
11390 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11391 (ins _.ScalarMemOp:$src1), OpcodeStr,
11392 "${src1}"##_.BroadcastStr,
11393 "${src1}"##_.BroadcastStr,
11394 (_.VT (OpNode (X86VBroadcast
11395 (_.ScalarLdFrag addr:$src1))))>,
11396 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
11397 Sched<[sched.Folded]>;
11400 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11401 X86SchedWriteWidths sched,
11402 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
11403 let Predicates = [prd] in
11404 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11407 let Predicates = [prd, HasVLX] in {
11408 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11410 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11415 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
11416 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
11418 let Predicates = [prd] in
11419 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
11422 let Predicates = [prd, HasVLX] in {
11423 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
11425 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
11430 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
11431 SDNode OpNode, X86SchedWriteWidths sched,
11433 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
11434 avx512vl_i64_info, prd>, VEX_W;
11435 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
11436 avx512vl_i32_info, prd>;
11439 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
11440 SDNode OpNode, X86SchedWriteWidths sched,
11442 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
11443 avx512vl_i16_info, prd>, VEX_WIG;
11444 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
11445 avx512vl_i8_info, prd>, VEX_WIG;
11448 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
11449 bits<8> opc_d, bits<8> opc_q,
11450 string OpcodeStr, SDNode OpNode,
11451 X86SchedWriteWidths sched> {
11452 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
11454 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
11458 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
11461 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
11462 let Predicates = [HasAVX512, NoVLX] in {
11463 def : Pat<(v4i64 (abs VR256X:$src)),
11466 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
11468 def : Pat<(v2i64 (abs VR128X:$src)),
11471 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
11475 // Use 512bit version to implement 128/256 bit.
11476 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
11477 AVX512VLVectorVTInfo _, Predicate prd> {
11478 let Predicates = [prd, NoVLX] in {
11479 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
11481 (!cast<Instruction>(InstrStr # "Zrr")
11482 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11483 _.info256.RC:$src1,
11484 _.info256.SubRegIdx)),
11485 _.info256.SubRegIdx)>;
11487 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
11489 (!cast<Instruction>(InstrStr # "Zrr")
11490 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
11491 _.info128.RC:$src1,
11492 _.info128.SubRegIdx)),
11493 _.info128.SubRegIdx)>;
11497 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
11498 SchedWriteVecIMul, HasCDI>;
11500 // FIXME: Is there a better scheduler class for VPCONFLICT?
11501 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
11502 SchedWriteVecALU, HasCDI>;
11504 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
11505 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
11506 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
11508 //===---------------------------------------------------------------------===//
11509 // Counts number of ones - VPOPCNTD and VPOPCNTQ
11510 //===---------------------------------------------------------------------===//
11512 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
11513 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
11514 SchedWriteVecALU, HasVPOPCNTDQ>;
11516 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
11517 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
11519 //===---------------------------------------------------------------------===//
11520 // Replicate Single FP - MOVSHDUP and MOVSLDUP
11521 //===---------------------------------------------------------------------===//
11523 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
11524 X86SchedWriteWidths sched> {
11525 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
11526 avx512vl_f32_info, HasAVX512>, XS;
11529 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
11530 SchedWriteFShuffle>;
11531 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
11532 SchedWriteFShuffle>;
11534 //===----------------------------------------------------------------------===//
11535 // AVX-512 - MOVDDUP
11536 //===----------------------------------------------------------------------===//
11538 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
11539 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
11540 let ExeDomain = _.ExeDomain in {
11541 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
11542 (ins _.RC:$src), OpcodeStr, "$src", "$src",
11543 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX,
11545 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
11546 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
11547 (_.VT (OpNode (_.VT (scalar_to_vector
11548 (_.ScalarLdFrag addr:$src)))))>,
11549 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
11550 Sched<[sched.Folded]>;
11554 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
11555 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
11556 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
11557 VTInfo.info512>, EVEX_V512;
11559 let Predicates = [HasAVX512, HasVLX] in {
11560 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
11561 VTInfo.info256>, EVEX_V256;
11562 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched.XMM,
11563 VTInfo.info128>, EVEX_V128;
11567 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
11568 X86SchedWriteWidths sched> {
11569 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
11570 avx512vl_f64_info>, XD, VEX_W;
11573 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
11575 let Predicates = [HasVLX] in {
11576 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
11577 (VMOVDDUPZ128rm addr:$src)>;
11578 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
11579 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11580 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
11581 (VMOVDDUPZ128rm addr:$src)>;
11582 def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))),
11583 (VMOVDDUPZ128rm addr:$src)>;
11585 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11586 (v2f64 VR128X:$src0)),
11587 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
11588 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11589 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
11590 (bitconvert (v4i32 immAllZerosV))),
11591 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
11593 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
11594 (v2f64 VR128X:$src0)),
11595 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
11596 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
11597 (bitconvert (v4i32 immAllZerosV))),
11598 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
11600 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
11601 (v2f64 VR128X:$src0)),
11602 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
11603 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
11604 (bitconvert (v4i32 immAllZerosV))),
11605 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
11608 //===----------------------------------------------------------------------===//
11609 // AVX-512 - Unpack Instructions
11610 //===----------------------------------------------------------------------===//
11612 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
11613 SchedWriteFShuffleSizes, 0, 1>;
11614 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
11615 SchedWriteFShuffleSizes>;
11617 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
11618 SchedWriteShuffle, HasBWI>;
11619 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
11620 SchedWriteShuffle, HasBWI>;
11621 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
11622 SchedWriteShuffle, HasBWI>;
11623 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
11624 SchedWriteShuffle, HasBWI>;
11626 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
11627 SchedWriteShuffle, HasAVX512>;
11628 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
11629 SchedWriteShuffle, HasAVX512>;
11630 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
11631 SchedWriteShuffle, HasAVX512>;
11632 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
11633 SchedWriteShuffle, HasAVX512>;
11635 //===----------------------------------------------------------------------===//
11636 // AVX-512 - Extract & Insert Integer Instructions
11637 //===----------------------------------------------------------------------===//
11639 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11640 X86VectorVTInfo _> {
11641 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
11642 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11643 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11644 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
11646 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
11649 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
11650 let Predicates = [HasBWI] in {
11651 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
11652 (ins _.RC:$src1, u8imm:$src2),
11653 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11654 [(set GR32orGR64:$dst,
11655 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
11656 EVEX, TAPD, Sched<[WriteVecExtract]>;
11658 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
11662 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
11663 let Predicates = [HasBWI] in {
11664 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
11665 (ins _.RC:$src1, u8imm:$src2),
11666 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11667 [(set GR32orGR64:$dst,
11668 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
11669 EVEX, PD, Sched<[WriteVecExtract]>;
11671 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
11672 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
11673 (ins _.RC:$src1, u8imm:$src2),
11674 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
11675 EVEX, TAPD, FoldGenData<NAME#rr>,
11676 Sched<[WriteVecExtract]>;
11678 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
11682 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
11683 RegisterClass GRC> {
11684 let Predicates = [HasDQI] in {
11685 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
11686 (ins _.RC:$src1, u8imm:$src2),
11687 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11689 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
11690 EVEX, TAPD, Sched<[WriteVecExtract]>;
11692 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
11693 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
11694 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
11695 [(store (extractelt (_.VT _.RC:$src1),
11696 imm:$src2),addr:$dst)]>,
11697 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
11698 Sched<[WriteVecExtractSt]>;
11702 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
11703 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
11704 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
11705 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
11707 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
11708 X86VectorVTInfo _, PatFrag LdFrag> {
11709 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
11710 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
11711 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11713 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
11714 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
11717 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
11718 X86VectorVTInfo _, PatFrag LdFrag> {
11719 let Predicates = [HasBWI] in {
11720 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11721 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
11722 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11724 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
11725 Sched<[WriteVecInsert]>;
11727 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
11731 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
11732 X86VectorVTInfo _, RegisterClass GRC> {
11733 let Predicates = [HasDQI] in {
11734 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
11735 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
11736 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
11738 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
11739 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
11741 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
11742 _.ScalarLdFrag>, TAPD;
11746 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11747 extloadi8>, TAPD, VEX_WIG;
11748 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11749 extloadi16>, PD, VEX_WIG;
11750 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11751 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11753 //===----------------------------------------------------------------------===//
11754 // VSHUFPS - VSHUFPD Operations
11755 //===----------------------------------------------------------------------===//
11757 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11758 AVX512VLVectorVTInfo VTInfo_FP>{
11759 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11760 SchedWriteFShuffle>,
11761 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11762 AVX512AIi8Base, EVEX_4V;
11765 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11766 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11768 //===----------------------------------------------------------------------===//
11769 // AVX-512 - Byte shift Left/Right
11770 //===----------------------------------------------------------------------===//
11772 // FIXME: The SSE/AVX names are PSLLDQri etc. - should we add the i here as well?
11773 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11774 Format MRMm, string OpcodeStr,
11775 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11776 def rr : AVX512<opc, MRMr,
11777 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11778 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11779 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
11781 def rm : AVX512<opc, MRMm,
11782 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11783 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11784 [(set _.RC:$dst,(_.VT (OpNode
11785 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11786 (i8 imm:$src2))))]>,
11787 Sched<[sched.Folded, sched.ReadAfterFold]>;
11790 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11791 Format MRMm, string OpcodeStr,
11792 X86SchedWriteWidths sched, Predicate prd>{
11793 let Predicates = [prd] in
11794 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11795 sched.ZMM, v64i8_info>, EVEX_V512;
11796 let Predicates = [prd, HasVLX] in {
11797 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11798 sched.YMM, v32i8x_info>, EVEX_V256;
11799 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11800 sched.XMM, v16i8x_info>, EVEX_V128;
11803 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11804 SchedWriteShuffle, HasBWI>,
11805 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11806 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11807 SchedWriteShuffle, HasBWI>,
11808 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11810 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11811 string OpcodeStr, X86FoldableSchedWrite sched,
11812 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11813 def rr : AVX512BI<opc, MRMSrcReg,
11814 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11815 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11816 [(set _dst.RC:$dst,(_dst.VT
11817 (OpNode (_src.VT _src.RC:$src1),
11818 (_src.VT _src.RC:$src2))))]>,
11820 def rm : AVX512BI<opc, MRMSrcMem,
11821 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11822 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11823 [(set _dst.RC:$dst,(_dst.VT
11824 (OpNode (_src.VT _src.RC:$src1),
11825 (_src.VT (bitconvert
11826 (_src.LdFrag addr:$src2))))))]>,
11827 Sched<[sched.Folded, sched.ReadAfterFold]>;
11830 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11831 string OpcodeStr, X86SchedWriteWidths sched,
11833 let Predicates = [prd] in
11834 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11835 v8i64_info, v64i8_info>, EVEX_V512;
11836 let Predicates = [prd, HasVLX] in {
11837 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11838 v4i64x_info, v32i8x_info>, EVEX_V256;
11839 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11840 v2i64x_info, v16i8x_info>, EVEX_V128;
11844 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11845 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11847 // Transforms to swizzle an immediate to enable better matching when
11848 // memory operand isn't in the right place.
11849 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
11850 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11851 uint8_t Imm = N->getZExtValue();
11852 // Swap bits 1/4 and 3/6.
11853 uint8_t NewImm = Imm & 0xa5;
11854 if (Imm & 0x02) NewImm |= 0x10;
11855 if (Imm & 0x10) NewImm |= 0x02;
11856 if (Imm & 0x08) NewImm |= 0x40;
11857 if (Imm & 0x40) NewImm |= 0x08;
11858 return getI8Imm(NewImm, SDLoc(N));
11860 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
11861 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11862 uint8_t Imm = N->getZExtValue();
11863 // Swap bits 2/4 and 3/5.
11864 uint8_t NewImm = Imm & 0xc3;
11865 if (Imm & 0x04) NewImm |= 0x10;
11866 if (Imm & 0x10) NewImm |= 0x04;
11867 if (Imm & 0x08) NewImm |= 0x20;
11868 if (Imm & 0x20) NewImm |= 0x08;
11869 return getI8Imm(NewImm, SDLoc(N));
11871 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
11872 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11873 uint8_t Imm = N->getZExtValue();
11874 // Swap bits 1/2 and 5/6.
11875 uint8_t NewImm = Imm & 0x99;
11876 if (Imm & 0x02) NewImm |= 0x04;
11877 if (Imm & 0x04) NewImm |= 0x02;
11878 if (Imm & 0x20) NewImm |= 0x40;
11879 if (Imm & 0x40) NewImm |= 0x20;
11880 return getI8Imm(NewImm, SDLoc(N));
11882 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
11883 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11884 uint8_t Imm = N->getZExtValue();
11885 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11886 uint8_t NewImm = Imm & 0x81;
11887 if (Imm & 0x02) NewImm |= 0x04;
11888 if (Imm & 0x04) NewImm |= 0x10;
11889 if (Imm & 0x08) NewImm |= 0x40;
11890 if (Imm & 0x10) NewImm |= 0x02;
11891 if (Imm & 0x20) NewImm |= 0x08;
11892 if (Imm & 0x40) NewImm |= 0x20;
11893 return getI8Imm(NewImm, SDLoc(N));
11895 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
11896 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11897 uint8_t Imm = N->getZExtValue();
11898 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11899 uint8_t NewImm = Imm & 0x81;
11900 if (Imm & 0x02) NewImm |= 0x10;
11901 if (Imm & 0x04) NewImm |= 0x02;
11902 if (Imm & 0x08) NewImm |= 0x20;
11903 if (Imm & 0x10) NewImm |= 0x04;
11904 if (Imm & 0x20) NewImm |= 0x40;
11905 if (Imm & 0x40) NewImm |= 0x08;
11906 return getI8Imm(NewImm, SDLoc(N));
11909 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11910 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11912 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11913 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11914 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11915 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11916 (OpNode (_.VT _.RC:$src1),
11919 (i8 imm:$src4)), 1, 1>,
11920 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11921 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11922 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11923 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11924 (OpNode (_.VT _.RC:$src1),
11926 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11927 (i8 imm:$src4)), 1, 0>,
11928 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11929 Sched<[sched.Folded, sched.ReadAfterFold]>;
11930 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11931 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11932 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
11933 "$src2, ${src3}"##_.BroadcastStr##", $src4",
11934 (OpNode (_.VT _.RC:$src1),
11936 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
11937 (i8 imm:$src4)), 1, 0>, EVEX_B,
11938 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11939 Sched<[sched.Folded, sched.ReadAfterFold]>;
11940 }// Constraints = "$src1 = $dst"
11942 // Additional patterns for matching passthru operand in other positions.
11943 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11944 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11946 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11947 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11948 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11949 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
11951 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11952 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
11954 // Additional patterns for matching loads in other positions.
11955 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11956 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
11957 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11958 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11959 def : Pat<(_.VT (OpNode _.RC:$src1,
11960 (bitconvert (_.LdFrag addr:$src3)),
11961 _.RC:$src2, (i8 imm:$src4))),
11962 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11963 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11965 // Additional patterns for matching zero masking with loads in other
11967 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11968 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11969 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11971 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11972 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11973 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11974 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11975 _.RC:$src2, (i8 imm:$src4)),
11977 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11978 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11980 // Additional patterns for matching masked loads with different
11982 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11983 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11984 _.RC:$src2, (i8 imm:$src4)),
11986 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11987 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
11988 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11989 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11990 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
11992 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11993 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
11994 def : Pat<(_.VT (vselect _.KRCWM:$mask,
11995 (OpNode _.RC:$src2, _.RC:$src1,
11996 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
11998 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11999 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
12000 def : Pat<(_.VT (vselect _.KRCWM:$mask,
12001 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
12002 _.RC:$src1, (i8 imm:$src4)),
12004 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
12005 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
12006 def : Pat<(_.VT (vselect _.KRCWM:$mask,
12007 (OpNode (bitconvert (_.LdFrag addr:$src3)),
12008 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
12010 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
12011 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
12013 // Additional patterns for matching broadcasts in other positions.
12014 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
12015 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
12016 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
12017 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
12018 def : Pat<(_.VT (OpNode _.RC:$src1,
12019 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
12020 _.RC:$src2, (i8 imm:$src4))),
12021 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
12022 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
12024 // Additional patterns for matching zero masking with broadcasts in other
12026 def : Pat<(_.VT (vselect _.KRCWM:$mask,
12027 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
12028 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
12030 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12031 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12032 (VPTERNLOG321_imm8 imm:$src4))>;
12033 def : Pat<(_.VT (vselect _.KRCWM:$mask,
12034 (OpNode _.RC:$src1,
12035 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
12036 _.RC:$src2, (i8 imm:$src4)),
12038 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
12039 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
12040 (VPTERNLOG132_imm8 imm:$src4))>;
12042 // Additional patterns for matching masked broadcasts with different
12044 def : Pat<(_.VT (vselect _.KRCWM:$mask,
12045 (OpNode _.RC:$src1,
12046 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
12047 _.RC:$src2, (i8 imm:$src4)),
12049 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12050 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
12051 def : Pat<(_.VT (vselect _.KRCWM:$mask,
12052 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
12053 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
12055 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12056 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
12057 def : Pat<(_.VT (vselect _.KRCWM:$mask,
12058 (OpNode _.RC:$src2, _.RC:$src1,
12059 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
12060 (i8 imm:$src4)), _.RC:$src1)),
12061 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12062 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
12063 def : Pat<(_.VT (vselect _.KRCWM:$mask,
12064 (OpNode _.RC:$src2,
12065 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
12066 _.RC:$src1, (i8 imm:$src4)),
12068 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12069 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
12070 def : Pat<(_.VT (vselect _.KRCWM:$mask,
12071 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
12072 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
12074 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
12075 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
12078 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
12079 AVX512VLVectorVTInfo _> {
12080 let Predicates = [HasAVX512] in
12081 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
12082 _.info512, NAME>, EVEX_V512;
12083 let Predicates = [HasAVX512, HasVLX] in {
12084 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
12085 _.info128, NAME>, EVEX_V128;
12086 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
12087 _.info256, NAME>, EVEX_V256;
12091 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
12092 avx512vl_i32_info>;
12093 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
12094 avx512vl_i64_info>, VEX_W;
12096 // Patterns to implement vnot using vpternlog instead of creating all ones
12097 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
12098 // so that the result is only dependent on src0. But we use the same source
12099 // for all operands to prevent a false dependency.
12100 // TODO: We should maybe have a more generalized algorithm for folding to
12102 let Predicates = [HasAVX512] in {
12103 def : Pat<(xor VR512:$src, (bc_v64i8 (v16i32 immAllOnesV))),
12104 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12105 def : Pat<(xor VR512:$src, (bc_v32i16 (v16i32 immAllOnesV))),
12106 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12107 def : Pat<(xor VR512:$src, (bc_v16i32 (v16i32 immAllOnesV))),
12108 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12109 def : Pat<(xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV))),
12110 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
12113 let Predicates = [HasAVX512, NoVLX] in {
12114 def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
12117 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12118 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12119 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12120 (i8 15)), sub_xmm)>;
12121 def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
12124 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12125 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12126 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12127 (i8 15)), sub_xmm)>;
12128 def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
12131 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12132 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12133 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12134 (i8 15)), sub_xmm)>;
12135 def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
12138 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12139 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12140 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
12141 (i8 15)), sub_xmm)>;
12143 def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
12146 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12147 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12148 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12149 (i8 15)), sub_ymm)>;
12150 def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
12153 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12154 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12155 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12156 (i8 15)), sub_ymm)>;
12157 def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
12160 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12161 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12162 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12163 (i8 15)), sub_ymm)>;
12164 def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
12167 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12168 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12169 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
12170 (i8 15)), sub_ymm)>;
12173 let Predicates = [HasVLX] in {
12174 def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))),
12175 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12176 def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))),
12177 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12178 def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))),
12179 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12180 def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))),
12181 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
12183 def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))),
12184 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12185 def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))),
12186 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12187 def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))),
12188 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12189 def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))),
12190 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
12193 //===----------------------------------------------------------------------===//
12194 // AVX-512 - FixupImm
12195 //===----------------------------------------------------------------------===//
12197 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
12198 X86FoldableSchedWrite sched, X86VectorVTInfo _,
12199 X86VectorVTInfo TblVT>{
12200 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
12201 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12202 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12203 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12204 (OpNode (_.VT _.RC:$src1),
12206 (TblVT.VT _.RC:$src3),
12208 (i32 FROUND_CURRENT))>, Sched<[sched]>;
12209 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12210 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
12211 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12212 (OpNode (_.VT _.RC:$src1),
12214 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
12216 (i32 FROUND_CURRENT))>,
12217 Sched<[sched.Folded, sched.ReadAfterFold]>;
12218 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12219 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12220 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
12221 "$src2, ${src3}"##_.BroadcastStr##", $src4",
12222 (OpNode (_.VT _.RC:$src1),
12224 (TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
12226 (i32 FROUND_CURRENT))>,
12227 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12228 } // Constraints = "$src1 = $dst"
12231 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
12232 SDNode OpNode, X86FoldableSchedWrite sched,
12233 X86VectorVTInfo _, X86VectorVTInfo TblVT>{
12234 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
12235 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12236 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12237 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
12238 "$src2, $src3, {sae}, $src4",
12239 (OpNode (_.VT _.RC:$src1),
12241 (TblVT.VT _.RC:$src3),
12243 (i32 FROUND_NO_EXC))>,
12244 EVEX_B, Sched<[sched]>;
12248 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
12249 X86FoldableSchedWrite sched, X86VectorVTInfo _,
12250 X86VectorVTInfo _src3VT> {
12251 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
12252 ExeDomain = _.ExeDomain in {
12253 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12254 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12255 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12256 (OpNode (_.VT _.RC:$src1),
12258 (_src3VT.VT _src3VT.RC:$src3),
12260 (i32 FROUND_CURRENT))>, Sched<[sched]>;
12261 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
12262 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
12263 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
12264 "$src2, $src3, {sae}, $src4",
12265 (OpNode (_.VT _.RC:$src1),
12267 (_src3VT.VT _src3VT.RC:$src3),
12269 (i32 FROUND_NO_EXC))>,
12270 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
12271 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
12272 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
12273 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
12274 (OpNode (_.VT _.RC:$src1),
12276 (_src3VT.VT (scalar_to_vector
12277 (_src3VT.ScalarLdFrag addr:$src3))),
12279 (i32 FROUND_CURRENT))>,
12280 Sched<[sched.Folded, sched.ReadAfterFold]>;
12284 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
12285 AVX512VLVectorVTInfo _Vec,
12286 AVX512VLVectorVTInfo _Tbl> {
12287 let Predicates = [HasAVX512] in
12288 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
12289 _Vec.info512, _Tbl.info512>,
12290 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched.ZMM,
12291 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
12292 EVEX_4V, EVEX_V512;
12293 let Predicates = [HasAVX512, HasVLX] in {
12294 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.XMM,
12295 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
12296 EVEX_4V, EVEX_V128;
12297 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched.YMM,
12298 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
12299 EVEX_4V, EVEX_V256;
12303 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
12304 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
12305 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
12306 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
12307 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
12308 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
12309 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
12310 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12311 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
12312 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
12314 // Patterns used to select SSE scalar fp arithmetic instructions from
12317 // (1) a scalar fp operation followed by a blend
12319 // The effect is that the backend no longer emits unnecessary vector
12320 // insert instructions immediately after SSE scalar fp instructions
12321 // like addss or mulss.
12323 // For example, given the following code:
12324 // __m128 foo(__m128 A, __m128 B) {
12329 // Previously we generated:
12330 // addss %xmm0, %xmm1
12331 // movss %xmm1, %xmm0
12333 // We now generate:
12334 // addss %xmm1, %xmm0
12336 // (2) a vector packed single/double fp operation followed by a vector insert
12338 // The effect is that the backend converts the packed fp instruction
12339 // followed by a vector insert into a single SSE scalar fp instruction.
12341 // For example, given the following code:
12342 // __m128 foo(__m128 A, __m128 B) {
12343 // __m128 C = A + B;
12344 // return (__m128) {c[0], a[1], a[2], a[3]};
12347 // Previously we generated:
12348 // addps %xmm0, %xmm1
12349 // movss %xmm1, %xmm0
12351 // We now generate:
12352 // addss %xmm1, %xmm0
12354 // TODO: Some canonicalization in lowering would simplify the number of
12355 // patterns we have to try to match.
12356 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode MoveNode,
12357 X86VectorVTInfo _, PatLeaf ZeroFP> {
12358 let Predicates = [HasAVX512] in {
12359 // extracted scalar math op with insert via movss
12360 def : Pat<(MoveNode
12361 (_.VT VR128X:$dst),
12362 (_.VT (scalar_to_vector
12363 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12365 (!cast<Instruction>("V"#OpcPrefix#Zrr_Int) _.VT:$dst,
12366 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12368 // extracted masked scalar math op with insert via movss
12369 def : Pat<(MoveNode (_.VT VR128X:$src1),
12371 (X86selects VK1WM:$mask,
12373 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12376 (!cast<Instruction>("V"#OpcPrefix#Zrr_Intk)
12377 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12378 VK1WM:$mask, _.VT:$src1,
12379 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12381 // extracted masked scalar math op with insert via movss
12382 def : Pat<(MoveNode (_.VT VR128X:$src1),
12384 (X86selects VK1WM:$mask,
12386 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12387 _.FRC:$src2), (_.EltVT ZeroFP)))),
12388 (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
12389 VK1WM:$mask, _.VT:$src1,
12390 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12394 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12395 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12396 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12397 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12399 defm : AVX512_scalar_math_fp_patterns<fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12400 defm : AVX512_scalar_math_fp_patterns<fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12401 defm : AVX512_scalar_math_fp_patterns<fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12402 defm : AVX512_scalar_math_fp_patterns<fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12404 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
12405 SDNode Move, X86VectorVTInfo _> {
12406 let Predicates = [HasAVX512] in {
12407 def : Pat<(_.VT (Move _.VT:$dst,
12408 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12409 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src)>;
12413 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12414 defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12416 multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
12417 SDNode Move, X86VectorVTInfo _,
12419 let Predicates = [HasAVX512] in {
12420 def : Pat<(_.VT (Move _.VT:$dst,
12421 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12422 (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
12427 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
12428 v4f32x_info, 0x01>;
12429 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
12430 v4f32x_info, 0x02>;
12431 defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
12432 v2f64x_info, 0x01>;
12433 defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
12434 v2f64x_info, 0x02>;
12436 //===----------------------------------------------------------------------===//
12437 // AES instructions
12438 //===----------------------------------------------------------------------===//
12440 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12441 let Predicates = [HasVLX, HasVAES] in {
12442 defm Z128 : AESI_binop_rm_int<Op, OpStr,
12443 !cast<Intrinsic>(IntPrefix),
12444 loadv2i64, 0, VR128X, i128mem>,
12445 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12446 defm Z256 : AESI_binop_rm_int<Op, OpStr,
12447 !cast<Intrinsic>(IntPrefix##"_256"),
12448 loadv4i64, 0, VR256X, i256mem>,
12449 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12451 let Predicates = [HasAVX512, HasVAES] in
12452 defm Z : AESI_binop_rm_int<Op, OpStr,
12453 !cast<Intrinsic>(IntPrefix##"_512"),
12454 loadv8i64, 0, VR512, i512mem>,
12455 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12458 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12459 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12460 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12461 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12463 //===----------------------------------------------------------------------===//
12464 // PCLMUL instructions - Carry less multiplication
12465 //===----------------------------------------------------------------------===//
12467 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12468 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12469 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12471 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12472 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12473 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12475 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12476 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12477 EVEX_CD8<64, CD8VF>, VEX_WIG;
12481 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12482 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12483 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12485 //===----------------------------------------------------------------------===//
12487 //===----------------------------------------------------------------------===//
12489 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12490 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12491 let Constraints = "$src1 = $dst",
12492 ExeDomain = VTI.ExeDomain in {
12493 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12494 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12495 "$src3, $src2", "$src2, $src3",
12496 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12497 AVX512FMA3Base, Sched<[sched]>;
12498 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12499 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12500 "$src3, $src2", "$src2, $src3",
12501 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12502 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12504 Sched<[sched.Folded, sched.ReadAfterFold]>;
12508 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12509 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12510 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12511 let Constraints = "$src1 = $dst",
12512 ExeDomain = VTI.ExeDomain in
12513 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12514 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12515 "${src3}"##VTI.BroadcastStr##", $src2",
12516 "$src2, ${src3}"##VTI.BroadcastStr,
12517 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12518 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
12519 AVX512FMA3Base, EVEX_B,
12520 Sched<[sched.Folded, sched.ReadAfterFold]>;
12523 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12524 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12525 let Predicates = [HasVBMI2] in
12526 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12528 let Predicates = [HasVBMI2, HasVLX] in {
12529 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12531 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12536 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12537 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12538 let Predicates = [HasVBMI2] in
12539 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12541 let Predicates = [HasVBMI2, HasVLX] in {
12542 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12544 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12548 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12549 SDNode OpNode, X86SchedWriteWidths sched> {
12550 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched,
12551 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12552 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched,
12553 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12554 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched,
12555 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12558 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12559 SDNode OpNode, X86SchedWriteWidths sched> {
12560 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched,
12561 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12562 VEX_W, EVEX_CD8<16, CD8VF>;
12563 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
12564 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12565 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
12566 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12570 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12571 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12572 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12573 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12576 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12577 avx512vl_i8_info, HasVBMI2>, EVEX,
12579 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12580 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12583 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12584 avx512vl_i8_info, HasVBMI2>, EVEX;
12585 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12586 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12588 //===----------------------------------------------------------------------===//
12590 //===----------------------------------------------------------------------===//
12592 let Constraints = "$src1 = $dst" in
12593 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12594 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12595 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12596 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12597 "$src3, $src2", "$src2, $src3",
12598 (VTI.VT (OpNode VTI.RC:$src1,
12599 VTI.RC:$src2, VTI.RC:$src3))>,
12600 EVEX_4V, T8PD, Sched<[sched]>;
12601 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12602 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12603 "$src3, $src2", "$src2, $src3",
12604 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12605 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12606 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12607 Sched<[sched.Folded, sched.ReadAfterFold]>;
12608 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12609 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12610 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
12611 "$src2, ${src3}"##VTI.BroadcastStr,
12612 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12613 (VTI.VT (X86VBroadcast
12614 (VTI.ScalarLdFrag addr:$src3))))>,
12615 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12616 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12619 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12620 X86SchedWriteWidths sched> {
12621 let Predicates = [HasVNNI] in
12622 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info>, EVEX_V512;
12623 let Predicates = [HasVNNI, HasVLX] in {
12624 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info>, EVEX_V256;
12625 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info>, EVEX_V128;
12629 // FIXME: Is there a better scheduler class for VPDP?
12630 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul>;
12631 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul>;
12632 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul>;
12633 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul>;
12635 //===----------------------------------------------------------------------===//
12637 //===----------------------------------------------------------------------===//
12639 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12640 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12641 avx512vl_i8_info, HasBITALG>;
12642 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12643 avx512vl_i16_info, HasBITALG>, VEX_W;
12645 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12646 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12648 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12649 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12650 (ins VTI.RC:$src1, VTI.RC:$src2),
12652 "$src2, $src1", "$src1, $src2",
12653 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12654 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12656 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12657 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12659 "$src2, $src1", "$src1, $src2",
12660 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12661 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12662 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12663 Sched<[sched.Folded, sched.ReadAfterFold]>;
12666 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12667 let Predicates = [HasBITALG] in
12668 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12669 let Predicates = [HasBITALG, HasVLX] in {
12670 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12671 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12675 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12676 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12678 //===----------------------------------------------------------------------===//
12680 //===----------------------------------------------------------------------===//
12682 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12683 X86SchedWriteWidths sched> {
12684 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12685 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12687 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12688 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12690 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12695 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12697 EVEX_CD8<8, CD8VF>, T8PD;
12699 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12700 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12701 X86VectorVTInfo BcstVTI>
12702 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12703 let ExeDomain = VTI.ExeDomain in
12704 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12705 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12706 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
12707 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
12708 (OpNode (VTI.VT VTI.RC:$src1),
12709 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
12710 (i8 imm:$src3))>, EVEX_B,
12711 Sched<[sched.Folded, sched.ReadAfterFold]>;
12714 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12715 X86SchedWriteWidths sched> {
12716 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12717 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12718 v64i8_info, v8i64_info>, EVEX_V512;
12719 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12720 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12721 v32i8x_info, v4i64x_info>, EVEX_V256;
12722 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12723 v16i8x_info, v2i64x_info>, EVEX_V128;
12727 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12728 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12729 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12730 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12731 X86GF2P8affineqb, SchedWriteVecIMul>,
12732 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12735 //===----------------------------------------------------------------------===//
12737 //===----------------------------------------------------------------------===//
12739 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12740 Constraints = "$src1 = $dst" in {
12741 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12742 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12743 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12744 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12745 Sched<[SchedWriteFMA.ZMM.Folded]>;
12747 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12748 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12749 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12750 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12751 Sched<[SchedWriteFMA.ZMM.Folded]>;
12753 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12754 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12755 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12756 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12757 Sched<[SchedWriteFMA.Scl.Folded]>;
12759 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12760 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12761 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12762 []>, EVEX_V128, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12763 Sched<[SchedWriteFMA.Scl.Folded]>;
12766 //===----------------------------------------------------------------------===//
12768 //===----------------------------------------------------------------------===//
12770 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12771 Constraints = "$src1 = $dst" in {
12772 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12773 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12774 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12775 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12776 Sched<[SchedWriteFMA.ZMM.Folded]>;
12778 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12779 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12780 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12781 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12782 Sched<[SchedWriteFMA.ZMM.Folded]>;