1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT). These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
23 RegisterClass RC = rc;
24 ValueType EltVT = eltvt;
25 int NumElts = numelts;
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
34 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
36 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
39 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
48 ValueType VT = !cast<ValueType>(VTName);
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
52 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
55 // "i" for integer types and "f" for floating-point types
56 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
58 // Size of RC in bits, e.g. 512 for VR512.
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
75 !if (!eq (Size, 512), "v8i64",
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
85 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
87 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
93 // The corresponding float type, e.g. v16f32 for v16i32
94 // Note: For EltSize < 32, FloatVT is illegal and TableGen
95 // fails to compile, so we choose FloatVT = VT
96 ValueType FloatVT = !cast<ValueType>(
97 !if (!eq (!srl(EltSize,5),0),
99 !if (!eq(TypeVariantName, "i"),
100 "v" # NumElts # "f" # EltSize,
103 ValueType IntVT = !cast<ValueType>(
104 !if (!eq (!srl(EltSize,5),0),
106 !if (!eq(TypeVariantName, "f"),
107 "v" # NumElts # "i" # EltSize,
109 // The string to specify embedded broadcast in assembly.
110 string BroadcastStr = "{1to" # NumElts # "}";
112 // 8-bit compressed displacement tuple/subvector format. This is only
113 // defined for NumElts <= 8.
114 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115 !cast<CD8VForm>("CD8VT" # NumElts), ?);
117 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118 !if (!eq (Size, 256), sub_ymm, ?));
120 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
124 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
126 // A vector tye of the same width with element type i64. This is used to
127 // create patterns for logic ops.
128 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
130 // A vector type of the same width with element type i32. This is used to
131 // create the canonical constant zero node ImmAllZerosV.
132 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
135 string ZSuffix = !if (!eq (Size, 128), "Z128",
136 !if (!eq (Size, 256), "Z256", "Z"));
139 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
140 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
141 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
143 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
146 // "x" in v32i8x_info means RC = VR256X
147 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
148 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
150 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
151 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
152 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
154 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
155 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
156 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
157 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
158 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
159 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
161 // We map scalar types to the smallest (128-bit) vector type
162 // with the appropriate element type. This allows to use the same masking logic.
163 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
164 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
165 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
166 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
168 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169 X86VectorVTInfo i128> {
170 X86VectorVTInfo info512 = i512;
171 X86VectorVTInfo info256 = i256;
172 X86VectorVTInfo info128 = i128;
175 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
177 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
179 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
181 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
183 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
185 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
188 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
190 RegisterClass KRC = _krc;
191 RegisterClass KRCWM = _krcwm;
195 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
196 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
203 // This multiclass generates the masking variants from the non-masking
204 // variant. It only provides the assembly pieces for the masking variants.
205 // It assumes custom ISel patterns for masking which can be provided as
206 // template arguments.
207 multiclass AVX512_maskable_custom<bits<8> O, Format F,
209 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
211 string AttSrcAsm, string IntelSrcAsm,
213 list<dag> MaskingPattern,
214 list<dag> ZeroMaskingPattern,
216 string MaskingConstraint = "",
217 bit IsCommutable = 0,
218 bit IsKCommutable = 0> {
219 let isCommutable = IsCommutable in
220 def NAME: AVX512<O, F, Outs, Ins,
221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222 "$dst, "#IntelSrcAsm#"}",
225 // Prefer over VMOV*rrk Pat<>
226 let isCommutable = IsKCommutable in
227 def NAME#k: AVX512<O, F, Outs, MaskingIns,
228 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
229 "$dst {${mask}}, "#IntelSrcAsm#"}",
230 MaskingPattern, itin>,
232 // In case of the 3src subclass this is overridden with a let.
233 string Constraints = MaskingConstraint;
236 // Zero mask does not add any restrictions to commute operands transformation.
237 // So, it is Ok to use IsCommutable instead of IsKCommutable.
238 let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
239 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
240 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
241 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
248 // Common base class of AVX512_maskable and AVX512_maskable_3src.
249 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
251 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
253 string AttSrcAsm, string IntelSrcAsm,
254 dag RHS, dag MaskingRHS,
256 SDNode Select = vselect,
257 string MaskingConstraint = "",
258 bit IsCommutable = 0,
259 bit IsKCommutable = 0> :
260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261 AttSrcAsm, IntelSrcAsm,
262 [(set _.RC:$dst, RHS)],
263 [(set _.RC:$dst, MaskingRHS)],
265 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
266 itin, MaskingConstraint, IsCommutable,
269 // This multiclass generates the unconditional/non-masking, the masking and
270 // the zero-masking variant of the vector instruction. In the masking case, the
271 // perserved vector elements come from a new dummy input operand tied to $dst.
272 // This version uses a separate dag for non-masking and masking.
273 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
274 dag Outs, dag Ins, string OpcodeStr,
275 string AttSrcAsm, string IntelSrcAsm,
276 dag RHS, dag MaskRHS,
278 bit IsCommutable = 0, bit IsKCommutable = 0,
279 SDNode Select = vselect> :
280 AVX512_maskable_custom<O, F, Outs, Ins,
281 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282 !con((ins _.KRCWM:$mask), Ins),
283 OpcodeStr, AttSrcAsm, IntelSrcAsm,
284 [(set _.RC:$dst, RHS)],
286 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
288 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
289 itin, "$src0 = $dst", IsCommutable, IsKCommutable>;
291 // This multiclass generates the unconditional/non-masking, the masking and
292 // the zero-masking variant of the vector instruction. In the masking case, the
293 // perserved vector elements come from a new dummy input operand tied to $dst.
294 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
295 dag Outs, dag Ins, string OpcodeStr,
296 string AttSrcAsm, string IntelSrcAsm,
299 bit IsCommutable = 0, bit IsKCommutable = 0,
300 SDNode Select = vselect> :
301 AVX512_maskable_common<O, F, _, Outs, Ins,
302 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
303 !con((ins _.KRCWM:$mask), Ins),
304 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
305 (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin,
306 Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
308 // This multiclass generates the unconditional/non-masking, the masking and
309 // the zero-masking variant of the scalar instruction.
310 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
311 dag Outs, dag Ins, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
315 bit IsCommutable = 0> :
316 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
317 RHS, itin, IsCommutable, 0, X86selects>;
319 // Similar to AVX512_maskable but in this case one of the source operands
320 // ($src1) is already tied to $dst so we just use that for the preserved
321 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
323 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
326 dag RHS, InstrItinClass itin,
327 bit IsCommutable = 0,
328 bit IsKCommutable = 0,
329 SDNode Select = vselect,
331 AVX512_maskable_common<O, F, _, Outs,
332 !con((ins _.RC:$src1), NonTiedIns),
333 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
334 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
335 OpcodeStr, AttSrcAsm, IntelSrcAsm,
336 !if(MaskOnly, (null_frag), RHS),
337 (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin,
338 Select, "", IsCommutable, IsKCommutable>;
340 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341 dag Outs, dag NonTiedIns, string OpcodeStr,
342 string AttSrcAsm, string IntelSrcAsm,
343 dag RHS, InstrItinClass itin,
344 bit IsCommutable = 0,
345 bit IsKCommutable = 0,
347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
348 IntelSrcAsm, RHS, itin, IsCommutable, IsKCommutable,
349 X86selects, MaskOnly>;
351 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
354 string AttSrcAsm, string IntelSrcAsm,
356 InstrItinClass itin> :
357 AVX512_maskable_custom<O, F, Outs, Ins,
358 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
359 !con((ins _.KRCWM:$mask), Ins),
360 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
361 itin, "$src0 = $dst">;
364 // Instruction with mask that puts result in mask register,
365 // like "compare" and "vptest"
366 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
368 dag Ins, dag MaskingIns,
370 string AttSrcAsm, string IntelSrcAsm,
372 list<dag> MaskingPattern,
374 bit IsCommutable = 0> {
375 let isCommutable = IsCommutable in
376 def NAME: AVX512<O, F, Outs, Ins,
377 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
378 "$dst, "#IntelSrcAsm#"}",
381 def NAME#k: AVX512<O, F, Outs, MaskingIns,
382 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
383 "$dst {${mask}}, "#IntelSrcAsm#"}",
384 MaskingPattern, itin>, EVEX_K;
387 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
389 dag Ins, dag MaskingIns,
391 string AttSrcAsm, string IntelSrcAsm,
392 dag RHS, dag MaskingRHS,
394 bit IsCommutable = 0> :
395 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
396 AttSrcAsm, IntelSrcAsm,
397 [(set _.KRC:$dst, RHS)],
398 [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
400 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
401 dag Outs, dag Ins, string OpcodeStr,
402 string AttSrcAsm, string IntelSrcAsm,
403 dag RHS, InstrItinClass itin,
404 bit IsCommutable = 0> :
405 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
406 !con((ins _.KRCWM:$mask), Ins),
407 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
408 (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
410 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
411 dag Outs, dag Ins, string OpcodeStr,
412 string AttSrcAsm, string IntelSrcAsm,
413 InstrItinClass itin> :
414 AVX512_maskable_custom_cmp<O, F, Outs,
415 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
416 AttSrcAsm, IntelSrcAsm, [],[], itin>;
418 // This multiclass generates the unconditional/non-masking, the masking and
419 // the zero-masking variant of the vector instruction. In the masking case, the
420 // perserved vector elements come from a new dummy input operand tied to $dst.
421 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
422 dag Outs, dag Ins, string OpcodeStr,
423 string AttSrcAsm, string IntelSrcAsm,
424 dag RHS, dag MaskedRHS,
426 bit IsCommutable = 0, SDNode Select = vselect> :
427 AVX512_maskable_custom<O, F, Outs, Ins,
428 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
429 !con((ins _.KRCWM:$mask), Ins),
430 OpcodeStr, AttSrcAsm, IntelSrcAsm,
431 [(set _.RC:$dst, RHS)],
433 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
435 (Select _.KRCWM:$mask, MaskedRHS,
437 itin, "$src0 = $dst", IsCommutable>;
440 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
441 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
442 // swizzled by ExecutionDepsFix to pxor.
443 // We set canFoldAsLoad because this can be converted to a constant-pool
444 // load of an all-zeros value if folding it would be beneficial.
445 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
446 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
447 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
448 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
449 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
450 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
453 // Alias instructions that allow VPTERNLOG to be used with a mask to create
454 // a mix of all ones and all zeros elements. This is done this way to force
455 // the same register to be used as input for all three sources.
456 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
457 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
458 (ins VK16WM:$mask), "",
459 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
460 (v16i32 immAllOnesV),
461 (v16i32 immAllZerosV)))]>;
462 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
463 (ins VK8WM:$mask), "",
464 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
465 (bc_v8i64 (v16i32 immAllOnesV)),
466 (bc_v8i64 (v16i32 immAllZerosV))))]>;
469 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
470 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
471 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
472 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
473 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
474 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
477 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
478 // This is expanded by ExpandPostRAPseudos.
479 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
480 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
481 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
482 [(set FR32X:$dst, fp32imm0)]>;
483 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
484 [(set FR64X:$dst, fpimm0)]>;
487 //===----------------------------------------------------------------------===//
488 // AVX-512 - VECTOR INSERT
491 // Supports two different pattern operators for mask and unmasked ops. Allows
492 // null_frag to be passed for one.
493 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
495 SDPatternOperator vinsert_insert,
496 SDPatternOperator vinsert_for_mask,
498 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
499 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
500 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
501 "vinsert" # From.EltTypeName # "x" # From.NumElts,
502 "$src3, $src2, $src1", "$src1, $src2, $src3",
503 (vinsert_insert:$src3 (To.VT To.RC:$src1),
504 (From.VT From.RC:$src2),
506 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
507 (From.VT From.RC:$src2),
508 (iPTR imm)), itins.rr>,
509 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
511 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
512 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
513 "vinsert" # From.EltTypeName # "x" # From.NumElts,
514 "$src3, $src2, $src1", "$src1, $src2, $src3",
515 (vinsert_insert:$src3 (To.VT To.RC:$src1),
516 (From.VT (bitconvert (From.LdFrag addr:$src2))),
518 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
519 (From.VT (bitconvert (From.LdFrag addr:$src2))),
520 (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V,
521 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
522 Sched<[itins.Sched.Folded, ReadAfterLd]>;
526 // Passes the same pattern operator for masked and unmasked ops.
527 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
529 SDPatternOperator vinsert_insert,
531 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>;
533 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
534 X86VectorVTInfo To, PatFrag vinsert_insert,
535 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
536 let Predicates = p in {
537 def : Pat<(vinsert_insert:$ins
538 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
539 (To.VT (!cast<Instruction>(InstrStr#"rr")
540 To.RC:$src1, From.RC:$src2,
541 (INSERT_get_vinsert_imm To.RC:$ins)))>;
543 def : Pat<(vinsert_insert:$ins
545 (From.VT (bitconvert (From.LdFrag addr:$src2))),
547 (To.VT (!cast<Instruction>(InstrStr#"rm")
548 To.RC:$src1, addr:$src2,
549 (INSERT_get_vinsert_imm To.RC:$ins)))>;
553 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
554 ValueType EltVT64, int Opcode256,
557 let Predicates = [HasVLX] in
558 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
559 X86VectorVTInfo< 4, EltVT32, VR128X>,
560 X86VectorVTInfo< 8, EltVT32, VR256X>,
561 vinsert128_insert, itins>, EVEX_V256;
563 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
564 X86VectorVTInfo< 4, EltVT32, VR128X>,
565 X86VectorVTInfo<16, EltVT32, VR512>,
566 vinsert128_insert, itins>, EVEX_V512;
568 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
569 X86VectorVTInfo< 4, EltVT64, VR256X>,
570 X86VectorVTInfo< 8, EltVT64, VR512>,
571 vinsert256_insert, itins>, VEX_W, EVEX_V512;
573 // Even with DQI we'd like to only use these instructions for masking.
574 let Predicates = [HasVLX, HasDQI] in
575 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
576 X86VectorVTInfo< 2, EltVT64, VR128X>,
577 X86VectorVTInfo< 4, EltVT64, VR256X>,
578 null_frag, vinsert128_insert, itins>,
581 // Even with DQI we'd like to only use these instructions for masking.
582 let Predicates = [HasDQI] in {
583 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
584 X86VectorVTInfo< 2, EltVT64, VR128X>,
585 X86VectorVTInfo< 8, EltVT64, VR512>,
586 null_frag, vinsert128_insert, itins>,
589 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
590 X86VectorVTInfo< 8, EltVT32, VR256X>,
591 X86VectorVTInfo<16, EltVT32, VR512>,
592 null_frag, vinsert256_insert, itins>,
597 // FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI?
598 let Sched = WriteFShuffle256 in
599 def AVX512_VINSERTF : OpndItins<
600 IIC_SSE_SHUFP, IIC_SSE_SHUFP
602 let Sched = WriteShuffle256 in
603 def AVX512_VINSERTI : OpndItins<
604 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
607 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>;
608 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>;
610 // Codegen pattern with the alternative types,
611 // Even with AVX512DQ we'll still use these for unmasked operations.
612 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
614 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
615 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
617 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
618 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
619 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
620 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
622 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
623 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
624 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
625 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
627 // Codegen pattern with the alternative types insert VEC128 into VEC256
628 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
629 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
630 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
631 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
632 // Codegen pattern with the alternative types insert VEC128 into VEC512
633 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
634 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
635 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
636 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
637 // Codegen pattern with the alternative types insert VEC256 into VEC512
638 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
639 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
641 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
644 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
645 X86VectorVTInfo To, X86VectorVTInfo Cast,
646 PatFrag vinsert_insert,
647 SDNodeXForm INSERT_get_vinsert_imm,
649 let Predicates = p in {
651 (vselect Cast.KRCWM:$mask,
653 (vinsert_insert:$ins (To.VT To.RC:$src1),
654 (From.VT From.RC:$src2),
657 (!cast<Instruction>(InstrStr#"rrk")
658 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659 (INSERT_get_vinsert_imm To.RC:$ins))>;
661 (vselect Cast.KRCWM:$mask,
663 (vinsert_insert:$ins (To.VT To.RC:$src1),
666 (From.LdFrag addr:$src2))),
669 (!cast<Instruction>(InstrStr#"rmk")
670 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
671 (INSERT_get_vinsert_imm To.RC:$ins))>;
674 (vselect Cast.KRCWM:$mask,
676 (vinsert_insert:$ins (To.VT To.RC:$src1),
677 (From.VT From.RC:$src2),
680 (!cast<Instruction>(InstrStr#"rrkz")
681 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
682 (INSERT_get_vinsert_imm To.RC:$ins))>;
684 (vselect Cast.KRCWM:$mask,
686 (vinsert_insert:$ins (To.VT To.RC:$src1),
689 (From.LdFrag addr:$src2))),
692 (!cast<Instruction>(InstrStr#"rmkz")
693 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
694 (INSERT_get_vinsert_imm To.RC:$ins))>;
698 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
699 v8f32x_info, vinsert128_insert,
700 INSERT_get_vinsert128_imm, [HasVLX]>;
701 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
702 v4f64x_info, vinsert128_insert,
703 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
705 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
706 v8i32x_info, vinsert128_insert,
707 INSERT_get_vinsert128_imm, [HasVLX]>;
708 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
709 v8i32x_info, vinsert128_insert,
710 INSERT_get_vinsert128_imm, [HasVLX]>;
711 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
712 v8i32x_info, vinsert128_insert,
713 INSERT_get_vinsert128_imm, [HasVLX]>;
714 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
715 v4i64x_info, vinsert128_insert,
716 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
717 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
718 v4i64x_info, vinsert128_insert,
719 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
721 v4i64x_info, vinsert128_insert,
722 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
724 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
725 v16f32_info, vinsert128_insert,
726 INSERT_get_vinsert128_imm, [HasAVX512]>;
727 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
728 v8f64_info, vinsert128_insert,
729 INSERT_get_vinsert128_imm, [HasDQI]>;
731 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
732 v16i32_info, vinsert128_insert,
733 INSERT_get_vinsert128_imm, [HasAVX512]>;
734 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
735 v16i32_info, vinsert128_insert,
736 INSERT_get_vinsert128_imm, [HasAVX512]>;
737 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
738 v16i32_info, vinsert128_insert,
739 INSERT_get_vinsert128_imm, [HasAVX512]>;
740 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
741 v8i64_info, vinsert128_insert,
742 INSERT_get_vinsert128_imm, [HasDQI]>;
743 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
744 v8i64_info, vinsert128_insert,
745 INSERT_get_vinsert128_imm, [HasDQI]>;
746 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
747 v8i64_info, vinsert128_insert,
748 INSERT_get_vinsert128_imm, [HasDQI]>;
750 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
751 v16f32_info, vinsert256_insert,
752 INSERT_get_vinsert256_imm, [HasDQI]>;
753 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
754 v8f64_info, vinsert256_insert,
755 INSERT_get_vinsert256_imm, [HasAVX512]>;
757 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
758 v16i32_info, vinsert256_insert,
759 INSERT_get_vinsert256_imm, [HasDQI]>;
760 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
761 v16i32_info, vinsert256_insert,
762 INSERT_get_vinsert256_imm, [HasDQI]>;
763 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
764 v16i32_info, vinsert256_insert,
765 INSERT_get_vinsert256_imm, [HasDQI]>;
766 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
767 v8i64_info, vinsert256_insert,
768 INSERT_get_vinsert256_imm, [HasAVX512]>;
769 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
770 v8i64_info, vinsert256_insert,
771 INSERT_get_vinsert256_imm, [HasAVX512]>;
772 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
773 v8i64_info, vinsert256_insert,
774 INSERT_get_vinsert256_imm, [HasAVX512]>;
776 // vinsertps - insert f32 to XMM
777 let ExeDomain = SSEPackedSingle in {
778 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
779 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
780 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
781 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))],
782 IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>;
783 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
784 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
785 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
786 [(set VR128X:$dst, (X86insertps VR128X:$src1,
787 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
788 imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V,
789 EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>;
792 //===----------------------------------------------------------------------===//
793 // AVX-512 VECTOR EXTRACT
796 // Supports two different pattern operators for mask and unmasked ops. Allows
797 // null_frag to be passed for one.
798 multiclass vextract_for_size_split<int Opcode,
799 X86VectorVTInfo From, X86VectorVTInfo To,
800 SDPatternOperator vextract_extract,
801 SDPatternOperator vextract_for_mask,
804 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
805 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
806 (ins From.RC:$src1, u8imm:$idx),
807 "vextract" # To.EltTypeName # "x" # To.NumElts,
808 "$idx, $src1", "$src1, $idx",
809 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
810 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)),
811 itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>;
813 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
814 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
815 "vextract" # To.EltTypeName # "x" # To.NumElts #
816 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
817 [(store (To.VT (vextract_extract:$idx
818 (From.VT From.RC:$src1), (iPTR imm))),
819 addr:$dst)], itins.rm>, EVEX,
820 Sched<[itins.Sched.Folded, ReadAfterLd]>;
822 let mayStore = 1, hasSideEffects = 0 in
823 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
824 (ins To.MemOp:$dst, To.KRCWM:$mask,
825 From.RC:$src1, u8imm:$idx),
826 "vextract" # To.EltTypeName # "x" # To.NumElts #
827 "\t{$idx, $src1, $dst {${mask}}|"
828 "$dst {${mask}}, $src1, $idx}",
829 [], itins.rm>, EVEX_K, EVEX,
830 Sched<[itins.Sched.Folded, ReadAfterLd]>;
834 // Passes the same pattern operator for masked and unmasked ops.
835 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
837 SDPatternOperator vextract_extract,
839 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, itins>;
841 // Codegen pattern for the alternative types
842 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
843 X86VectorVTInfo To, PatFrag vextract_extract,
844 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
845 let Predicates = p in {
846 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
847 (To.VT (!cast<Instruction>(InstrStr#"rr")
849 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
850 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
851 (iPTR imm))), addr:$dst),
852 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
853 (EXTRACT_get_vextract_imm To.RC:$ext))>;
857 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
858 ValueType EltVT64, int Opcode256,
860 let Predicates = [HasAVX512] in {
861 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
862 X86VectorVTInfo<16, EltVT32, VR512>,
863 X86VectorVTInfo< 4, EltVT32, VR128X>,
864 vextract128_extract, itins>,
865 EVEX_V512, EVEX_CD8<32, CD8VT4>;
866 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
867 X86VectorVTInfo< 8, EltVT64, VR512>,
868 X86VectorVTInfo< 4, EltVT64, VR256X>,
869 vextract256_extract, itins>,
870 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
872 let Predicates = [HasVLX] in
873 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
874 X86VectorVTInfo< 8, EltVT32, VR256X>,
875 X86VectorVTInfo< 4, EltVT32, VR128X>,
876 vextract128_extract, itins>,
877 EVEX_V256, EVEX_CD8<32, CD8VT4>;
879 // Even with DQI we'd like to only use these instructions for masking.
880 let Predicates = [HasVLX, HasDQI] in
881 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
882 X86VectorVTInfo< 4, EltVT64, VR256X>,
883 X86VectorVTInfo< 2, EltVT64, VR128X>,
884 null_frag, vextract128_extract, itins>,
885 VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
887 // Even with DQI we'd like to only use these instructions for masking.
888 let Predicates = [HasDQI] in {
889 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
890 X86VectorVTInfo< 8, EltVT64, VR512>,
891 X86VectorVTInfo< 2, EltVT64, VR128X>,
892 null_frag, vextract128_extract, itins>,
893 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
894 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
895 X86VectorVTInfo<16, EltVT32, VR512>,
896 X86VectorVTInfo< 8, EltVT32, VR256X>,
897 null_frag, vextract256_extract, itins>,
898 EVEX_V512, EVEX_CD8<32, CD8VT8>;
902 // FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI?
903 let Sched = WriteFShuffle256 in
904 def AVX512_VEXTRACTF : OpndItins<
905 IIC_SSE_SHUFP, IIC_SSE_SHUFP
907 let Sched = WriteShuffle256 in
908 def AVX512_VEXTRACTI : OpndItins<
909 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
912 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, AVX512_VEXTRACTF>;
913 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, AVX512_VEXTRACTI>;
915 // extract_subvector codegen patterns with the alternative types.
916 // Even with AVX512DQ we'll still use these for unmasked operations.
917 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
918 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
919 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
920 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
922 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
923 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
924 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
925 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
927 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
929 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
932 // Codegen pattern with the alternative types extract VEC128 from VEC256
933 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
934 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
935 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
938 // Codegen pattern with the alternative types extract VEC128 from VEC512
939 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
941 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
942 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
943 // Codegen pattern with the alternative types extract VEC256 from VEC512
944 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
945 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
946 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
947 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
950 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
951 // smaller extract to enable EVEX->VEX.
952 let Predicates = [NoVLX] in {
953 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
954 (v2i64 (VEXTRACTI128rr
955 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
957 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
958 (v2f64 (VEXTRACTF128rr
959 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
961 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
962 (v4i32 (VEXTRACTI128rr
963 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
965 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
966 (v4f32 (VEXTRACTF128rr
967 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
969 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
970 (v8i16 (VEXTRACTI128rr
971 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
973 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
974 (v16i8 (VEXTRACTI128rr
975 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
979 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
980 // smaller extract to enable EVEX->VEX.
981 let Predicates = [HasVLX] in {
982 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
983 (v2i64 (VEXTRACTI32x4Z256rr
984 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
986 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
987 (v2f64 (VEXTRACTF32x4Z256rr
988 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
990 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
991 (v4i32 (VEXTRACTI32x4Z256rr
992 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
994 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
995 (v4f32 (VEXTRACTF32x4Z256rr
996 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
998 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
999 (v8i16 (VEXTRACTI32x4Z256rr
1000 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1002 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1003 (v16i8 (VEXTRACTI32x4Z256rr
1004 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1009 // Additional patterns for handling a bitcast between the vselect and the
1010 // extract_subvector.
1011 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1012 X86VectorVTInfo To, X86VectorVTInfo Cast,
1013 PatFrag vextract_extract,
1014 SDNodeXForm EXTRACT_get_vextract_imm,
1015 list<Predicate> p> {
1016 let Predicates = p in {
1017 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1019 (To.VT (vextract_extract:$ext
1020 (From.VT From.RC:$src), (iPTR imm)))),
1022 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1023 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1024 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1026 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1028 (To.VT (vextract_extract:$ext
1029 (From.VT From.RC:$src), (iPTR imm)))),
1030 Cast.ImmAllZerosV)),
1031 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1032 Cast.KRCWM:$mask, From.RC:$src,
1033 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1037 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1038 v4f32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasVLX]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1041 v2f64x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1044 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1045 v4i32x_info, vextract128_extract,
1046 EXTRACT_get_vextract128_imm, [HasVLX]>;
1047 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1048 v4i32x_info, vextract128_extract,
1049 EXTRACT_get_vextract128_imm, [HasVLX]>;
1050 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1051 v4i32x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasVLX]>;
1053 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1054 v2i64x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1056 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1057 v2i64x_info, vextract128_extract,
1058 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1059 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1060 v2i64x_info, vextract128_extract,
1061 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1063 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1064 v4f32x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1067 v2f64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI]>;
1070 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1071 v4i32x_info, vextract128_extract,
1072 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1073 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1074 v4i32x_info, vextract128_extract,
1075 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1077 v4i32x_info, vextract128_extract,
1078 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1079 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1080 v2i64x_info, vextract128_extract,
1081 EXTRACT_get_vextract128_imm, [HasDQI]>;
1082 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1083 v2i64x_info, vextract128_extract,
1084 EXTRACT_get_vextract128_imm, [HasDQI]>;
1085 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1086 v2i64x_info, vextract128_extract,
1087 EXTRACT_get_vextract128_imm, [HasDQI]>;
1089 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1090 v8f32x_info, vextract256_extract,
1091 EXTRACT_get_vextract256_imm, [HasDQI]>;
1092 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1093 v4f64x_info, vextract256_extract,
1094 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1096 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1097 v8i32x_info, vextract256_extract,
1098 EXTRACT_get_vextract256_imm, [HasDQI]>;
1099 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1100 v8i32x_info, vextract256_extract,
1101 EXTRACT_get_vextract256_imm, [HasDQI]>;
1102 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1103 v8i32x_info, vextract256_extract,
1104 EXTRACT_get_vextract256_imm, [HasDQI]>;
1105 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1106 v4i64x_info, vextract256_extract,
1107 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1108 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1109 v4i64x_info, vextract256_extract,
1110 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1111 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1112 v4i64x_info, vextract256_extract,
1113 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1115 // vextractps - extract 32 bits from XMM
1116 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1117 (ins VR128X:$src1, u8imm:$src2),
1118 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1119 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))],
1120 IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>;
1122 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1123 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1124 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1125 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1126 addr:$dst)], IIC_SSE_EXTRACTPS_RM>,
1127 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>;
1129 //===---------------------------------------------------------------------===//
1130 // AVX-512 BROADCAST
1132 // broadcast with a scalar argument.
1133 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1134 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1135 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136 (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
1137 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139 (X86VBroadcast SrcInfo.FRC:$src),
1140 DestInfo.RC:$src0)),
1141 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
1142 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145 (X86VBroadcast SrcInfo.FRC:$src),
1146 DestInfo.ImmAllZerosV)),
1147 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
1148 DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1151 // Split version to allow mask and broadcast node to be different types. This
1152 // helps support the 32x2 broadcasts.
1153 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1154 SchedWrite SchedRR, SchedWrite SchedRM,
1155 X86VectorVTInfo MaskInfo,
1156 X86VectorVTInfo DestInfo,
1157 X86VectorVTInfo SrcInfo,
1158 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1159 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1160 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1161 (outs MaskInfo.RC:$dst),
1162 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1166 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1170 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1171 NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
1173 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1174 (outs MaskInfo.RC:$dst),
1175 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1178 (DestInfo.VT (UnmaskedOp
1179 (SrcInfo.ScalarLdFrag addr:$src))))),
1182 (DestInfo.VT (X86VBroadcast
1183 (SrcInfo.ScalarLdFrag addr:$src))))),
1184 NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1188 def : Pat<(MaskInfo.VT
1190 (DestInfo.VT (UnmaskedOp
1191 (SrcInfo.VT (scalar_to_vector
1192 (SrcInfo.ScalarLdFrag addr:$src))))))),
1193 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
1194 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1198 (SrcInfo.VT (scalar_to_vector
1199 (SrcInfo.ScalarLdFrag addr:$src)))))),
1200 MaskInfo.RC:$src0)),
1201 (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
1202 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1203 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1207 (SrcInfo.VT (scalar_to_vector
1208 (SrcInfo.ScalarLdFrag addr:$src)))))),
1209 MaskInfo.ImmAllZerosV)),
1210 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
1211 MaskInfo.KRCWM:$mask, addr:$src)>;
1214 // Helper class to force mask and broadcast result to same type.
1215 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1216 SchedWrite SchedRR, SchedWrite SchedRM,
1217 X86VectorVTInfo DestInfo,
1218 X86VectorVTInfo SrcInfo> :
1219 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1220 DestInfo, DestInfo, SrcInfo>;
1222 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1223 AVX512VLVectorVTInfo _> {
1224 let Predicates = [HasAVX512] in
1225 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1226 WriteFShuffle256Ld, _.info512, _.info128>,
1227 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1230 let Predicates = [HasVLX] in {
1231 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1232 WriteFShuffle256Ld, _.info256, _.info128>,
1233 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1238 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1239 AVX512VLVectorVTInfo _> {
1240 let Predicates = [HasAVX512] in
1241 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1242 WriteFShuffle256Ld, _.info512, _.info128>,
1243 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1246 let Predicates = [HasVLX] in {
1247 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1248 WriteFShuffle256Ld, _.info256, _.info128>,
1249 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1251 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1252 WriteFShuffle256Ld, _.info128, _.info128>,
1253 avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
1257 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1259 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1260 avx512vl_f64_info>, VEX_W;
1262 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
1263 (VBROADCASTSSZm addr:$src)>;
1264 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
1265 (VBROADCASTSDZm addr:$src)>;
1267 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268 X86VectorVTInfo _, SDPatternOperator OpNode,
1269 RegisterClass SrcRC> {
1270 let ExeDomain = _.ExeDomain in
1271 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1273 "vpbroadcast"##_.Suffix, "$src", "$src",
1274 (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
1278 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279 X86VectorVTInfo _, SDPatternOperator OpNode,
1280 RegisterClass SrcRC, SubRegIndex Subreg> {
1281 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283 (outs _.RC:$dst), (ins GR32:$src),
1284 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287 NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1289 def : Pat <(_.VT (OpNode SrcRC:$src)),
1290 (!cast<Instruction>(Name#r)
1291 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1293 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1297 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305 let Predicates = [prd] in
1306 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307 OpNode, SrcRC, Subreg>, EVEX_V512;
1308 let Predicates = [prd, HasVLX] in {
1309 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1316 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317 SDPatternOperator OpNode,
1318 RegisterClass SrcRC, Predicate prd> {
1319 let Predicates = [prd] in
1320 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1322 let Predicates = [prd, HasVLX] in {
1323 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1325 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1330 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1335 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336 X86VBroadcast, GR32, HasAVX512>;
1337 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1340 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
1341 (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
1342 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
1343 (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
1345 // Provide aliases for broadcast from the same register class that
1346 // automatically does the extract.
1347 multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1348 X86VectorVTInfo SrcInfo> {
1349 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1350 (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1351 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1354 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1355 AVX512VLVectorVTInfo _, Predicate prd> {
1356 let Predicates = [prd] in {
1357 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1358 WriteShuffle256Ld, _.info512, _.info128>,
1359 avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1361 // Defined separately to avoid redefinition.
1362 defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1364 let Predicates = [prd, HasVLX] in {
1365 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1366 WriteShuffle256Ld, _.info256, _.info128>,
1367 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1369 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1370 WriteShuffleLd, _.info128, _.info128>,
1375 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1376 avx512vl_i8_info, HasBWI>;
1377 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1378 avx512vl_i16_info, HasBWI>;
1379 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1380 avx512vl_i32_info, HasAVX512>;
1381 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1382 avx512vl_i64_info, HasAVX512>, VEX_W;
1384 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1385 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1386 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1388 (_Dst.VT (X86SubVBroadcast
1389 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1390 NoItinerary>, AVX5128IBase, EVEX,
1391 Sched<[WriteShuffleLd]>;
1394 // This should be used for the AVX512DQ broadcast instructions. It disables
1395 // the unmasked patterns so that we only use the DQ instructions when masking
1397 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1398 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1399 let hasSideEffects = 0, mayLoad = 1 in
1400 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1401 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1403 (_Dst.VT (X86SubVBroadcast
1404 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1405 NoItinerary>, AVX5128IBase, EVEX,
1406 Sched<[WriteShuffleLd]>;
1409 let Predicates = [HasAVX512] in {
1410 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1411 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1412 (VPBROADCASTQZm addr:$src)>;
1415 let Predicates = [HasVLX] in {
1416 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1417 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1418 (VPBROADCASTQZ128m addr:$src)>;
1419 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1420 (VPBROADCASTQZ256m addr:$src)>;
1422 let Predicates = [HasVLX, HasBWI] in {
1423 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1424 // This means we'll encounter truncated i32 loads; match that here.
1425 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1426 (VPBROADCASTWZ128m addr:$src)>;
1427 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1428 (VPBROADCASTWZ256m addr:$src)>;
1429 def : Pat<(v8i16 (X86VBroadcast
1430 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1431 (VPBROADCASTWZ128m addr:$src)>;
1432 def : Pat<(v16i16 (X86VBroadcast
1433 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1434 (VPBROADCASTWZ256m addr:$src)>;
1437 //===----------------------------------------------------------------------===//
1438 // AVX-512 BROADCAST SUBVECTORS
1441 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1442 v16i32_info, v4i32x_info>,
1443 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1444 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1445 v16f32_info, v4f32x_info>,
1446 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1447 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1448 v8i64_info, v4i64x_info>, VEX_W,
1449 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1450 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1451 v8f64_info, v4f64x_info>, VEX_W,
1452 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1454 let Predicates = [HasAVX512] in {
1455 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1456 (VBROADCASTF64X4rm addr:$src)>;
1457 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1458 (VBROADCASTI64X4rm addr:$src)>;
1459 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1460 (VBROADCASTI64X4rm addr:$src)>;
1461 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1462 (VBROADCASTI64X4rm addr:$src)>;
1464 // Provide fallback in case the load node that is used in the patterns above
1465 // is used by additional users, which prevents the pattern selection.
1466 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1467 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468 (v4f64 VR256X:$src), 1)>;
1469 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1470 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1471 (v8f32 VR256X:$src), 1)>;
1472 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1473 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1474 (v4i64 VR256X:$src), 1)>;
1475 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1476 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1477 (v8i32 VR256X:$src), 1)>;
1478 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1479 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1480 (v16i16 VR256X:$src), 1)>;
1481 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1482 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1483 (v32i8 VR256X:$src), 1)>;
1485 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1486 (VBROADCASTF32X4rm addr:$src)>;
1487 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1488 (VBROADCASTI32X4rm addr:$src)>;
1489 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1490 (VBROADCASTI32X4rm addr:$src)>;
1491 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1492 (VBROADCASTI32X4rm addr:$src)>;
1495 let Predicates = [HasVLX] in {
1496 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1497 v8i32x_info, v4i32x_info>,
1498 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1499 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1500 v8f32x_info, v4f32x_info>,
1501 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1503 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1504 (VBROADCASTF32X4Z256rm addr:$src)>;
1505 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1506 (VBROADCASTI32X4Z256rm addr:$src)>;
1507 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1508 (VBROADCASTI32X4Z256rm addr:$src)>;
1509 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1510 (VBROADCASTI32X4Z256rm addr:$src)>;
1512 // Provide fallback in case the load node that is used in the patterns above
1513 // is used by additional users, which prevents the pattern selection.
1514 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1515 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1516 (v2f64 VR128X:$src), 1)>;
1517 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1518 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1519 (v4f32 VR128X:$src), 1)>;
1520 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1521 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1522 (v2i64 VR128X:$src), 1)>;
1523 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1524 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1525 (v4i32 VR128X:$src), 1)>;
1526 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1527 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1528 (v8i16 VR128X:$src), 1)>;
1529 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1530 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1531 (v16i8 VR128X:$src), 1)>;
1534 let Predicates = [HasVLX, HasDQI] in {
1535 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1536 v4i64x_info, v2i64x_info>, VEX_W,
1537 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1538 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1539 v4f64x_info, v2f64x_info>, VEX_W,
1540 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1543 let Predicates = [HasDQI] in {
1544 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1545 v8i64_info, v2i64x_info>, VEX_W,
1546 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1547 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1548 v16i32_info, v8i32x_info>,
1549 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1550 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1551 v8f64_info, v2f64x_info>, VEX_W,
1552 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1553 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1554 v16f32_info, v8f32x_info>,
1555 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1558 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1559 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1560 let Predicates = [HasDQI] in
1561 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1562 WriteShuffle256Ld, _Dst.info512,
1563 _Src.info512, _Src.info128, null_frag>,
1565 let Predicates = [HasDQI, HasVLX] in
1566 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1567 WriteShuffle256Ld, _Dst.info256,
1568 _Src.info256, _Src.info128, null_frag>,
1572 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1573 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1574 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1576 let Predicates = [HasDQI, HasVLX] in
1577 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1578 WriteShuffleLd, _Dst.info128,
1579 _Src.info128, _Src.info128, null_frag>,
1583 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1584 avx512vl_i32_info, avx512vl_i64_info>;
1585 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1586 avx512vl_f32_info, avx512vl_f64_info>;
1588 let Predicates = [HasVLX] in {
1589 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1590 (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1591 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1592 (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1595 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1596 (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
1597 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1598 (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1600 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1601 (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
1602 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1603 (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1605 //===----------------------------------------------------------------------===//
1606 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1608 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1609 X86VectorVTInfo _, RegisterClass KRC> {
1610 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1611 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1612 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
1613 IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
1616 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1617 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1618 let Predicates = [HasCDI] in
1619 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1620 let Predicates = [HasCDI, HasVLX] in {
1621 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1622 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1626 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1627 avx512vl_i32_info, VK16>;
1628 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1629 avx512vl_i64_info, VK8>, VEX_W;
1631 //===----------------------------------------------------------------------===//
1632 // -- VPERMI2 - 3 source operands form --
1634 let Sched = WriteFShuffle256 in
1635 def AVX512_PERM2_F : OpndItins<
1636 IIC_SSE_SHUFP, IIC_SSE_SHUFP
1639 let Sched = WriteShuffle256 in
1640 def AVX512_PERM2_I : OpndItins<
1641 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
1644 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins,
1645 X86VectorVTInfo _> {
1646 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1647 // The index operand in the pattern should really be an integer type. However,
1648 // if we do that and it happens to come from a bitcast, then it becomes
1649 // difficult to find the bitcast needed to convert the index to the
1650 // destination type for the passthru since it will be folded with the bitcast
1651 // of the index operand.
1652 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1653 (ins _.RC:$src2, _.RC:$src3),
1654 OpcodeStr, "$src3, $src2", "$src2, $src3",
1655 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)),
1656 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
1658 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1659 (ins _.RC:$src2, _.MemOp:$src3),
1660 OpcodeStr, "$src3, $src2", "$src2, $src3",
1661 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
1662 (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>,
1663 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
1667 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
1668 X86VectorVTInfo _> {
1669 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1670 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1671 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1672 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1673 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1674 (_.VT (X86VPermi2X _.RC:$src1,
1675 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1676 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1677 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1680 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
1681 AVX512VLVectorVTInfo VTInfo> {
1682 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>,
1683 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1684 let Predicates = [HasVLX] in {
1685 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>,
1686 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1687 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,
1688 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1692 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1694 AVX512VLVectorVTInfo VTInfo,
1696 let Predicates = [Prd] in
1697 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1698 let Predicates = [Prd, HasVLX] in {
1699 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1700 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1704 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I,
1705 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1706 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I,
1707 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1708 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I,
1709 avx512vl_i16_info, HasBWI>,
1710 VEX_W, EVEX_CD8<16, CD8VF>;
1711 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I,
1712 avx512vl_i8_info, HasVBMI>,
1714 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F,
1715 avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
1716 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F,
1717 avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1720 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins,
1721 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1722 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1723 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1724 (ins IdxVT.RC:$src2, _.RC:$src3),
1725 OpcodeStr, "$src3, $src2", "$src2, $src3",
1726 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)),
1727 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
1729 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1730 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1731 OpcodeStr, "$src3, $src2", "$src2, $src3",
1732 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1733 (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>,
1734 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
1737 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
1738 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1739 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1740 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1741 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1742 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1743 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1744 (_.VT (X86VPermt2 _.RC:$src1,
1745 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1746 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1747 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1750 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
1751 AVX512VLVectorVTInfo VTInfo,
1752 AVX512VLVectorVTInfo ShuffleMask> {
1753 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
1754 ShuffleMask.info512>,
1755 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512,
1756 ShuffleMask.info512>, EVEX_V512;
1757 let Predicates = [HasVLX] in {
1758 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
1759 ShuffleMask.info128>,
1760 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128,
1761 ShuffleMask.info128>, EVEX_V128;
1762 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
1763 ShuffleMask.info256>,
1764 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256,
1765 ShuffleMask.info256>, EVEX_V256;
1769 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins,
1770 AVX512VLVectorVTInfo VTInfo,
1771 AVX512VLVectorVTInfo Idx,
1773 let Predicates = [Prd] in
1774 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
1775 Idx.info512>, EVEX_V512;
1776 let Predicates = [Prd, HasVLX] in {
1777 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
1778 Idx.info128>, EVEX_V128;
1779 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
1780 Idx.info256>, EVEX_V256;
1784 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I,
1785 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1786 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I,
1787 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1788 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I,
1789 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1790 VEX_W, EVEX_CD8<16, CD8VF>;
1791 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I,
1792 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1794 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F,
1795 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1796 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F,
1797 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1799 //===----------------------------------------------------------------------===//
1800 // AVX-512 - BLEND using mask
1803 let Sched = WriteFVarBlend in
1804 def AVX512_BLENDM : OpndItins<
1805 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
1808 let Sched = WriteVarBlend in
1809 def AVX512_PBLENDM : OpndItins<
1810 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
1813 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins,
1814 X86VectorVTInfo _> {
1815 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1816 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1817 (ins _.RC:$src1, _.RC:$src2),
1818 !strconcat(OpcodeStr,
1819 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1820 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1821 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1822 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1823 !strconcat(OpcodeStr,
1824 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1825 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
1826 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1827 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1828 !strconcat(OpcodeStr,
1829 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1830 [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>;
1831 let mayLoad = 1 in {
1832 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1833 (ins _.RC:$src1, _.MemOp:$src2),
1834 !strconcat(OpcodeStr,
1835 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1836 [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1837 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1838 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1840 !strconcat(OpcodeStr,
1841 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1842 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1843 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1844 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1845 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1846 !strconcat(OpcodeStr,
1847 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1848 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1849 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1853 multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins,
1854 X86VectorVTInfo _> {
1855 let mayLoad = 1, hasSideEffects = 0 in {
1856 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1857 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1858 !strconcat(OpcodeStr,
1859 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1860 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1861 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1862 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1864 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1865 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1866 !strconcat(OpcodeStr,
1867 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1868 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1869 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1870 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1874 multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins,
1875 AVX512VLVectorVTInfo VTInfo> {
1876 defm Z : avx512_blendmask <opc, OpcodeStr, itins, VTInfo.info512>,
1877 avx512_blendmask_rmb <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1879 let Predicates = [HasVLX] in {
1880 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>,
1881 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1882 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>,
1883 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1887 multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins,
1888 AVX512VLVectorVTInfo VTInfo> {
1889 let Predicates = [HasBWI] in
1890 defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1892 let Predicates = [HasBWI, HasVLX] in {
1893 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1894 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1899 defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>;
1900 defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W;
1901 defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>;
1902 defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W;
1903 defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>;
1904 defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W;
1907 //===----------------------------------------------------------------------===//
1908 // Compare Instructions
1909 //===----------------------------------------------------------------------===//
1911 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1913 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
1915 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1917 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1918 "vcmp${cc}"#_.Suffix,
1919 "$src2, $src1", "$src1, $src2",
1920 (OpNode (_.VT _.RC:$src1),
1922 imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1924 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1926 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
1927 "vcmp${cc}"#_.Suffix,
1928 "$src2, $src1", "$src1, $src2",
1929 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
1930 imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1931 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1933 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1935 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1936 "vcmp${cc}"#_.Suffix,
1937 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
1938 (OpNodeRnd (_.VT _.RC:$src1),
1941 (i32 FROUND_NO_EXC)), itins.rr>,
1942 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
1943 // Accept explicit immediate argument form instead of comparison code.
1944 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1945 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1947 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1949 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V,
1950 Sched<[itins.Sched]>;
1952 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1954 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1956 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
1957 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1958 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1960 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1962 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1964 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>,
1965 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
1966 }// let isAsmParserOnly = 1, hasSideEffects = 0
1968 let isCodeGenOnly = 1 in {
1969 let isCommutable = 1 in
1970 def rr : AVX512Ii8<0xC2, MRMSrcReg,
1971 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
1972 !strconcat("vcmp${cc}", _.Suffix,
1973 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1974 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1977 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1978 def rm : AVX512Ii8<0xC2, MRMSrcMem,
1980 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1981 !strconcat("vcmp${cc}", _.Suffix,
1982 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1983 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1984 (_.ScalarLdFrag addr:$src2),
1986 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1987 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1991 let Predicates = [HasAVX512] in {
1992 let ExeDomain = SSEPackedSingle in
1993 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
1994 SSE_ALU_F32S>, AVX512XSIi8Base;
1995 let ExeDomain = SSEPackedDouble in
1996 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
1997 SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
2000 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2001 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
2002 let isCommutable = IsCommutable in
2003 def rr : AVX512BI<opc, MRMSrcReg,
2004 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2005 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2006 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
2007 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2008 def rm : AVX512BI<opc, MRMSrcMem,
2009 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2010 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2011 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2012 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
2013 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2014 let isCommutable = IsCommutable in
2015 def rrk : AVX512BI<opc, MRMSrcReg,
2016 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2017 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2018 "$dst {${mask}}, $src1, $src2}"),
2019 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2020 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
2021 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2022 def rmk : AVX512BI<opc, MRMSrcMem,
2023 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2024 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2025 "$dst {${mask}}, $src1, $src2}"),
2026 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2027 (OpNode (_.VT _.RC:$src1),
2029 (_.LdFrag addr:$src2))))))],
2030 itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2033 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
2034 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> :
2035 avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
2036 def rmb : AVX512BI<opc, MRMSrcMem,
2037 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2038 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2039 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2040 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2041 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
2042 itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2043 def rmbk : AVX512BI<opc, MRMSrcMem,
2044 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2045 _.ScalarMemOp:$src2),
2046 !strconcat(OpcodeStr,
2047 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2048 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2049 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2050 (OpNode (_.VT _.RC:$src1),
2052 (_.ScalarLdFrag addr:$src2)))))],
2053 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2054 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2057 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
2058 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2059 Predicate prd, bit IsCommutable = 0> {
2060 let Predicates = [prd] in
2061 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
2062 IsCommutable>, EVEX_V512;
2064 let Predicates = [prd, HasVLX] in {
2065 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
2066 IsCommutable>, EVEX_V256;
2067 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
2068 IsCommutable>, EVEX_V128;
2072 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2073 SDNode OpNode, OpndItins itins,
2074 AVX512VLVectorVTInfo VTInfo,
2075 Predicate prd, bit IsCommutable = 0> {
2076 let Predicates = [prd] in
2077 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
2078 IsCommutable>, EVEX_V512;
2080 let Predicates = [prd, HasVLX] in {
2081 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
2082 IsCommutable>, EVEX_V256;
2083 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
2084 IsCommutable>, EVEX_V128;
2088 // FIXME: Is there a better scheduler itinerary for VPCMP?
2089 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
2090 SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
2091 EVEX_CD8<8, CD8VF>, VEX_WIG;
2093 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
2094 SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
2095 EVEX_CD8<16, CD8VF>, VEX_WIG;
2097 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
2098 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
2099 EVEX_CD8<32, CD8VF>;
2101 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
2102 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
2103 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2105 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2106 SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
2107 EVEX_CD8<8, CD8VF>, VEX_WIG;
2109 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2110 SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
2111 EVEX_CD8<16, CD8VF>, VEX_WIG;
2113 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2114 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
2115 EVEX_CD8<32, CD8VF>;
2117 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2118 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
2119 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2121 // Transforms to swizzle an immediate to help matching memory operand in first
2123 def CommutePCMPCC : SDNodeXForm<imm, [{
2124 uint8_t Imm = N->getZExtValue() & 0x7;
2126 default: llvm_unreachable("Unreachable!");
2127 case 0x01: Imm = 0x06; break; // LT -> NLE
2128 case 0x02: Imm = 0x05; break; // LE -> NLT
2129 case 0x05: Imm = 0x02; break; // NLT -> LE
2130 case 0x06: Imm = 0x01; break; // NLE -> LT
2137 return getI8Imm(Imm, SDLoc(N));
2140 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
2141 OpndItins itins, X86VectorVTInfo _> {
2142 let isCommutable = 1 in
2143 def rri : AVX512AIi8<opc, MRMSrcReg,
2144 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2145 !strconcat("vpcmp${cc}", Suffix,
2146 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2147 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2149 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2150 def rmi : AVX512AIi8<opc, MRMSrcMem,
2151 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2152 !strconcat("vpcmp${cc}", Suffix,
2153 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2154 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2155 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2157 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2158 let isCommutable = 1 in
2159 def rrik : AVX512AIi8<opc, MRMSrcReg,
2160 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2162 !strconcat("vpcmp${cc}", Suffix,
2163 "\t{$src2, $src1, $dst {${mask}}|",
2164 "$dst {${mask}}, $src1, $src2}"),
2165 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2166 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2168 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2169 def rmik : AVX512AIi8<opc, MRMSrcMem,
2170 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2172 !strconcat("vpcmp${cc}", Suffix,
2173 "\t{$src2, $src1, $dst {${mask}}|",
2174 "$dst {${mask}}, $src1, $src2}"),
2175 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2176 (OpNode (_.VT _.RC:$src1),
2177 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2179 itins.rm>, EVEX_4V, EVEX_K,
2180 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2182 // Accept explicit immediate argument form instead of comparison code.
2183 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2184 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2185 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2186 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2187 "$dst, $src1, $src2, $cc}"),
2188 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2190 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2191 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2192 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2193 "$dst, $src1, $src2, $cc}"),
2194 [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2195 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2196 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2198 !strconcat("vpcmp", Suffix,
2199 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2200 "$dst {${mask}}, $src1, $src2, $cc}"),
2201 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2203 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2204 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2206 !strconcat("vpcmp", Suffix,
2207 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2208 "$dst {${mask}}, $src1, $src2, $cc}"),
2209 [], itins.rm>, EVEX_4V, EVEX_K,
2210 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2213 def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
2214 (_.VT _.RC:$src1), imm:$cc),
2215 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2216 (CommutePCMPCC imm:$cc))>;
2218 def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
2219 (_.VT _.RC:$src1), imm:$cc)),
2220 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2221 _.RC:$src1, addr:$src2,
2222 (CommutePCMPCC imm:$cc))>;
2225 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
2226 OpndItins itins, X86VectorVTInfo _> :
2227 avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
2228 def rmib : AVX512AIi8<opc, MRMSrcMem,
2229 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2231 !strconcat("vpcmp${cc}", Suffix,
2232 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2233 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2234 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2235 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2237 itins.rm>, EVEX_4V, EVEX_B,
2238 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2239 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2240 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2241 _.ScalarMemOp:$src2, AVX512ICC:$cc),
2242 !strconcat("vpcmp${cc}", Suffix,
2243 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2244 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2245 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2246 (OpNode (_.VT _.RC:$src1),
2247 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2249 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2250 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2252 // Accept explicit immediate argument form instead of comparison code.
2253 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2254 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2255 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2257 !strconcat("vpcmp", Suffix,
2258 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2259 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2260 [], itins.rm>, EVEX_4V, EVEX_B,
2261 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2262 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2263 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2264 _.ScalarMemOp:$src2, u8imm:$cc),
2265 !strconcat("vpcmp", Suffix,
2266 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2267 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2268 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2269 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2272 def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2273 (_.VT _.RC:$src1), imm:$cc),
2274 (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
2275 (CommutePCMPCC imm:$cc))>;
2277 def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
2278 (_.ScalarLdFrag addr:$src2)),
2279 (_.VT _.RC:$src1), imm:$cc)),
2280 (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
2281 _.RC:$src1, addr:$src2,
2282 (CommutePCMPCC imm:$cc))>;
2285 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
2286 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2288 let Predicates = [prd] in
2289 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
2292 let Predicates = [prd, HasVLX] in {
2293 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
2295 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
2300 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
2301 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2303 let Predicates = [prd] in
2304 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
2307 let Predicates = [prd, HasVLX] in {
2308 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
2310 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
2315 // FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
2316 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
2317 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2318 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
2319 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2321 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
2322 avx512vl_i16_info, HasBWI>,
2323 VEX_W, EVEX_CD8<16, CD8VF>;
2324 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
2325 avx512vl_i16_info, HasBWI>,
2326 VEX_W, EVEX_CD8<16, CD8VF>;
2328 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
2329 avx512vl_i32_info, HasAVX512>,
2330 EVEX_CD8<32, CD8VF>;
2331 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
2332 avx512vl_i32_info, HasAVX512>,
2333 EVEX_CD8<32, CD8VF>;
2335 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
2336 avx512vl_i64_info, HasAVX512>,
2337 VEX_W, EVEX_CD8<64, CD8VF>;
2338 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
2339 avx512vl_i64_info, HasAVX512>,
2340 VEX_W, EVEX_CD8<64, CD8VF>;
2343 multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
2344 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2345 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2346 "vcmp${cc}"#_.Suffix,
2347 "$src2, $src1", "$src1, $src2",
2348 (X86cmpm (_.VT _.RC:$src1),
2350 imm:$cc), itins.rr, 1>,
2351 Sched<[itins.Sched]>;
2353 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2354 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2355 "vcmp${cc}"#_.Suffix,
2356 "$src2, $src1", "$src1, $src2",
2357 (X86cmpm (_.VT _.RC:$src1),
2358 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2359 imm:$cc), itins.rm>,
2360 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2362 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2364 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2365 "vcmp${cc}"#_.Suffix,
2366 "${src2}"##_.BroadcastStr##", $src1",
2367 "$src1, ${src2}"##_.BroadcastStr,
2368 (X86cmpm (_.VT _.RC:$src1),
2369 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2370 imm:$cc), itins.rm>,
2371 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2372 // Accept explicit immediate argument form instead of comparison code.
2373 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2374 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2376 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2378 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
2379 Sched<[itins.Sched]>;
2381 let mayLoad = 1 in {
2382 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2384 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2386 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2387 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2389 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2391 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2393 "$cc, ${src2}"##_.BroadcastStr##", $src1",
2394 "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
2395 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2399 // Patterns for selecting with loads in other operand.
2400 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2401 CommutableCMPCC:$cc),
2402 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2405 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2407 CommutableCMPCC:$cc)),
2408 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2409 _.RC:$src1, addr:$src2,
2412 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2413 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2414 (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2417 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2418 (_.ScalarLdFrag addr:$src2)),
2420 CommutableCMPCC:$cc)),
2421 (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2422 _.RC:$src1, addr:$src2,
2426 multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
2427 // comparison code form (VCMP[EQ/LT/LE/...]
2428 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2429 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2430 "vcmp${cc}"#_.Suffix,
2431 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2432 (X86cmpmRnd (_.VT _.RC:$src1),
2435 (i32 FROUND_NO_EXC)), itins.rr>,
2436 EVEX_B, Sched<[itins.Sched]>;
2438 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2439 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2441 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2443 "$cc, {sae}, $src2, $src1",
2444 "$src1, $src2, {sae}, $cc", itins.rr>,
2445 EVEX_B, Sched<[itins.Sched]>;
2449 multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
2450 let Predicates = [HasAVX512] in {
2451 defm Z : avx512_vcmp_common<itins, _.info512>,
2452 avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
2455 let Predicates = [HasAVX512,HasVLX] in {
2456 defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
2457 defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
2461 defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
2462 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2463 defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
2464 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2467 // Patterns to select fp compares with load as first operand.
2468 let Predicates = [HasAVX512] in {
2469 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2470 CommutableCMPCC:$cc)),
2471 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2473 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2474 CommutableCMPCC:$cc)),
2475 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2478 // ----------------------------------------------------------------
2480 //handle fpclass instruction mask = op(reg_scalar,imm)
2481 // op(mem_scalar,imm)
2482 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2483 OpndItins itins, X86VectorVTInfo _,
2485 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2486 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2487 (ins _.RC:$src1, i32u8imm:$src2),
2488 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2489 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2490 (i32 imm:$src2)))], itins.rr>,
2491 Sched<[itins.Sched]>;
2492 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2493 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2494 OpcodeStr##_.Suffix#
2495 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2496 [(set _.KRC:$dst,(or _.KRCWM:$mask,
2497 (OpNode (_.VT _.RC:$src1),
2498 (i32 imm:$src2))))], itins.rr>,
2499 EVEX_K, Sched<[itins.Sched]>;
2500 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2501 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2502 OpcodeStr##_.Suffix##
2503 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2505 (OpNode _.ScalarIntMemCPat:$src1,
2506 (i32 imm:$src2)))], itins.rm>,
2507 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2508 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2509 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2510 OpcodeStr##_.Suffix##
2511 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2512 [(set _.KRC:$dst,(or _.KRCWM:$mask,
2513 (OpNode _.ScalarIntMemCPat:$src1,
2514 (i32 imm:$src2))))], itins.rm>,
2515 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2519 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2520 // fpclass(reg_vec, mem_vec, imm)
2521 // fpclass(reg_vec, broadcast(eltVt), imm)
2522 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2523 OpndItins itins, X86VectorVTInfo _,
2524 string mem, string broadcast>{
2525 let ExeDomain = _.ExeDomain in {
2526 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2527 (ins _.RC:$src1, i32u8imm:$src2),
2528 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2529 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2530 (i32 imm:$src2)))], itins.rr>,
2531 Sched<[itins.Sched]>;
2532 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2533 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2534 OpcodeStr##_.Suffix#
2535 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2536 [(set _.KRC:$dst,(or _.KRCWM:$mask,
2537 (OpNode (_.VT _.RC:$src1),
2538 (i32 imm:$src2))))], itins.rr>,
2539 EVEX_K, Sched<[itins.Sched]>;
2540 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2541 (ins _.MemOp:$src1, i32u8imm:$src2),
2542 OpcodeStr##_.Suffix##mem#
2543 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2544 [(set _.KRC:$dst,(OpNode
2545 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2546 (i32 imm:$src2)))], itins.rm>,
2547 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2548 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2549 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2550 OpcodeStr##_.Suffix##mem#
2551 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2552 [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
2553 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2554 (i32 imm:$src2))))], itins.rm>,
2555 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2556 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2557 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2558 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2559 _.BroadcastStr##", $dst|$dst, ${src1}"
2560 ##_.BroadcastStr##", $src2}",
2561 [(set _.KRC:$dst,(OpNode
2562 (_.VT (X86VBroadcast
2563 (_.ScalarLdFrag addr:$src1))),
2564 (i32 imm:$src2)))], itins.rm>,
2565 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2566 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2567 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2568 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2569 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2570 _.BroadcastStr##", $src2}",
2571 [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
2572 (_.VT (X86VBroadcast
2573 (_.ScalarLdFrag addr:$src1))),
2574 (i32 imm:$src2))))], itins.rm>,
2575 EVEX_B, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2579 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2580 bits<8> opc, SDNode OpNode,
2581 OpndItins itins, Predicate prd,
2583 let Predicates = [prd] in {
2584 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2585 _.info512, "{z}", broadcast>, EVEX_V512;
2587 let Predicates = [prd, HasVLX] in {
2588 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2589 _.info128, "{x}", broadcast>, EVEX_V128;
2590 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2591 _.info256, "{y}", broadcast>, EVEX_V256;
2595 // FIXME: Is there a better scheduler itinerary for VFPCLASS?
2596 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2597 bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
2598 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2599 VecOpNode, SSE_ALU_F32P, prd, "{l}">,
2600 EVEX_CD8<32, CD8VF>;
2601 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2602 VecOpNode, SSE_ALU_F64P, prd, "{q}">,
2603 EVEX_CD8<64, CD8VF> , VEX_W;
2604 defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2605 SSE_ALU_F32S, f32x_info, prd>,
2606 EVEX_CD8<32, CD8VT1>;
2607 defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2608 SSE_ALU_F64S, f64x_info, prd>,
2609 EVEX_CD8<64, CD8VT1>, VEX_W;
2612 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2613 X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
2615 //-----------------------------------------------------------------
2616 // Mask register copy, including
2617 // - copy between mask registers
2618 // - load/store mask registers
2619 // - copy from GPR to mask register and vice versa
2621 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2622 string OpcodeStr, RegisterClass KRC,
2623 ValueType vvt, X86MemOperand x86memop> {
2624 let hasSideEffects = 0, SchedRW = [WriteMove] in
2625 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2626 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2628 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2629 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2630 [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>;
2631 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2632 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2633 [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>;
2636 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2638 RegisterClass KRC, RegisterClass GRC> {
2639 let hasSideEffects = 0 in {
2640 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2641 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2642 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
2643 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2644 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2645 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
2649 let Predicates = [HasDQI] in
2650 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2651 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2654 let Predicates = [HasAVX512] in
2655 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2656 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2659 let Predicates = [HasBWI] in {
2660 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2662 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2664 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2666 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2670 // GR from/to mask register
2671 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2672 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2673 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2674 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2676 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2677 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2678 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2679 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2681 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2682 (KMOVWrk VK16:$src)>;
2683 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2684 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2686 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2687 (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
2688 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2689 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2690 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2691 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2693 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2694 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2695 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2696 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2697 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2698 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2699 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2700 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2703 let Predicates = [HasDQI] in {
2704 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
2705 (KMOVBmk addr:$dst, VK8:$src)>;
2706 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2707 (KMOVBkm addr:$src)>;
2709 def : Pat<(store VK4:$src, addr:$dst),
2710 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2711 def : Pat<(store VK2:$src, addr:$dst),
2712 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
2713 def : Pat<(store VK1:$src, addr:$dst),
2714 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2716 def : Pat<(v2i1 (load addr:$src)),
2717 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2718 def : Pat<(v4i1 (load addr:$src)),
2719 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2721 let Predicates = [HasAVX512, NoDQI] in {
2722 def : Pat<(store VK1:$src, addr:$dst),
2724 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
2726 def : Pat<(store VK2:$src, addr:$dst),
2728 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
2730 def : Pat<(store VK4:$src, addr:$dst),
2732 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
2734 def : Pat<(store VK8:$src, addr:$dst),
2736 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
2739 def : Pat<(v8i1 (load addr:$src)),
2740 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2741 def : Pat<(v2i1 (load addr:$src)),
2742 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
2743 def : Pat<(v4i1 (load addr:$src)),
2744 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
2747 let Predicates = [HasAVX512] in {
2748 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
2749 (KMOVWmk addr:$dst, VK16:$src)>;
2750 def : Pat<(v1i1 (load addr:$src)),
2751 (COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
2752 def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
2753 (KMOVWkm addr:$src)>;
2755 let Predicates = [HasBWI] in {
2756 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
2757 (KMOVDmk addr:$dst, VK32:$src)>;
2758 def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
2759 (KMOVDkm addr:$src)>;
2760 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
2761 (KMOVQmk addr:$dst, VK64:$src)>;
2762 def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
2763 (KMOVQkm addr:$src)>;
2766 let Predicates = [HasAVX512] in {
2767 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2768 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2769 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2771 def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
2772 (COPY_TO_REGCLASS maskRC:$src, GR32)>;
2774 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2775 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2778 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2779 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2780 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2781 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2782 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2783 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2784 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2786 def : Pat<(X86kshiftr (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2788 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2789 GR8:$src, sub_8bit), (i32 1))), VK1)>;
2790 def : Pat<(X86kshiftr (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2792 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2793 GR8:$src, sub_8bit), (i32 1))), VK16)>;
2794 def : Pat<(X86kshiftr (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2796 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2797 GR8:$src, sub_8bit), (i32 1))), VK8)>;
2801 // Mask unary operation
2803 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2804 RegisterClass KRC, SDPatternOperator OpNode,
2805 OpndItins itins, Predicate prd> {
2806 let Predicates = [prd] in
2807 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2808 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2809 [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
2810 Sched<[itins.Sched]>;
2813 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2814 SDPatternOperator OpNode, OpndItins itins> {
2815 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2816 itins, HasDQI>, VEX, PD;
2817 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2818 itins, HasAVX512>, VEX, PS;
2819 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2820 itins, HasBWI>, VEX, PD, VEX_W;
2821 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2822 itins, HasBWI>, VEX, PS, VEX_W;
2825 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
2827 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2828 let Predicates = [HasAVX512, NoDQI] in
2829 def : Pat<(vnot VK8:$src),
2830 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2832 def : Pat<(vnot VK4:$src),
2833 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2834 def : Pat<(vnot VK2:$src),
2835 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2837 // Mask binary operation
2838 // - KAND, KANDN, KOR, KXNOR, KXOR
2839 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2840 RegisterClass KRC, SDPatternOperator OpNode,
2841 OpndItins itins, Predicate prd, bit IsCommutable> {
2842 let Predicates = [prd], isCommutable = IsCommutable in
2843 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2844 !strconcat(OpcodeStr,
2845 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2846 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2847 Sched<[itins.Sched]>;
2850 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2851 SDPatternOperator OpNode, OpndItins itins,
2852 bit IsCommutable, Predicate prdW = HasAVX512> {
2853 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2854 itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2855 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2856 itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2857 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2858 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2859 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2860 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2863 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2864 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2865 // These nodes use 'vnot' instead of 'not' to support vectors.
2866 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2867 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2869 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>;
2870 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>;
2871 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
2872 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>;
2873 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
2874 defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, SSE_BIT_ITINS_P, 1, HasDQI>;
2876 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2878 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2879 // for the DQI set, this type is legal and KxxxB instruction is used
2880 let Predicates = [NoDQI] in
2881 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2883 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2884 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2886 // All types smaller than 8 bits require conversion anyway
2887 def : Pat<(OpNode VK1:$src1, VK1:$src2),
2888 (COPY_TO_REGCLASS (Inst
2889 (COPY_TO_REGCLASS VK1:$src1, VK16),
2890 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2891 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2892 (COPY_TO_REGCLASS (Inst
2893 (COPY_TO_REGCLASS VK2:$src1, VK16),
2894 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
2895 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2896 (COPY_TO_REGCLASS (Inst
2897 (COPY_TO_REGCLASS VK4:$src1, VK16),
2898 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
2901 defm : avx512_binop_pat<and, and, KANDWrr>;
2902 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2903 defm : avx512_binop_pat<or, or, KORWrr>;
2904 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2905 defm : avx512_binop_pat<xor, xor, KXORWrr>;
2908 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
2909 RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
2910 let Predicates = [prd] in {
2911 let hasSideEffects = 0 in
2912 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2913 (ins KRC:$src1, KRC:$src2),
2914 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2915 itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
2917 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2918 (!cast<Instruction>(NAME##rr)
2919 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2920 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2924 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
2925 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
2926 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
2929 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2930 SDNode OpNode, OpndItins itins, Predicate prd> {
2931 let Predicates = [prd], Defs = [EFLAGS] in
2932 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2933 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2934 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2935 Sched<[itins.Sched]>;
2938 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2939 OpndItins itins, Predicate prdW = HasAVX512> {
2940 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
2942 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
2944 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
2946 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
2950 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
2951 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
2954 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2955 SDNode OpNode, OpndItins itins> {
2956 let Predicates = [HasAVX512] in
2957 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2958 !strconcat(OpcodeStr,
2959 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2960 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
2961 itins.rr>, Sched<[itins.Sched]>;
2964 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2965 SDNode OpNode, OpndItins itins> {
2966 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2967 itins>, VEX, TAPD, VEX_W;
2968 let Predicates = [HasDQI] in
2969 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2971 let Predicates = [HasBWI] in {
2972 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2973 itins>, VEX, TAPD, VEX_W;
2974 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2979 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
2980 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
2982 multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
2983 def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
2984 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
2985 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2986 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
2988 def : Pat<(v8i1 (and VK8:$mask,
2989 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
2991 (!cast<Instruction>(InstStr##Zrrk)
2992 (COPY_TO_REGCLASS VK8:$mask, VK16),
2993 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2994 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
2998 multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
2999 AVX512VLVectorVTInfo _> {
3000 def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
3001 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
3002 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3003 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
3006 def : Pat<(v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1),
3007 (_.info256.VT VR256X:$src2), imm:$cc))),
3008 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
3009 (COPY_TO_REGCLASS VK8:$mask, VK16),
3010 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3011 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
3015 let Predicates = [HasAVX512, NoVLX] in {
3016 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
3017 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
3019 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
3020 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
3021 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
3024 // Mask setting all 0s or 1s
3025 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3026 let Predicates = [HasAVX512] in
3027 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3028 SchedRW = [WriteZero] in
3029 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3030 [(set KRC:$dst, (VT Val))]>;
3033 multiclass avx512_mask_setop_w<PatFrag Val> {
3034 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3035 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3036 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3039 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3040 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3042 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3043 let Predicates = [HasAVX512] in {
3044 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3045 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3046 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3047 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3048 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3049 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3050 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3051 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3054 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3055 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3056 RegisterClass RC, ValueType VT> {
3057 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3058 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3060 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3061 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3063 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3064 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3065 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3066 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3067 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3068 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3070 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3071 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3072 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3073 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3074 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3076 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3077 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3078 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3079 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3081 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3082 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3083 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3085 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3086 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3088 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3091 multiclass vextract_for_mask_to_mask<string InstrStr, X86KVectorVTInfo From,
3092 X86KVectorVTInfo To, Predicate prd> {
3093 let Predicates = [prd] in
3095 Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
3096 (To.KVT(COPY_TO_REGCLASS
3097 (!cast<Instruction>(InstrStr#"ri") From.KVT:$src,
3098 (i8 imm:$imm8)), To.KRC))>;
3101 multiclass vextract_for_mask_to_mask_legal_w<X86KVectorVTInfo From,
3102 X86KVectorVTInfo To> {
3104 Pat<(To.KVT(extract_subvector(From.KVT From.KRC:$src), (iPTR imm:$imm8))),
3105 (To.KVT(COPY_TO_REGCLASS
3106 (KSHIFTRWri(COPY_TO_REGCLASS From.KRC:$src, VK16),
3107 (i8 imm:$imm8)), To.KRC))>;
3110 defm : vextract_for_mask_to_mask_legal_w<v2i1_info, v1i1_info>;
3111 defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v1i1_info>;
3112 defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v1i1_info>;
3113 defm : vextract_for_mask_to_mask_legal_w<v4i1_info, v2i1_info>;
3114 defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v2i1_info>;
3115 defm : vextract_for_mask_to_mask_legal_w<v8i1_info, v4i1_info>;
3117 defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v1i1_info, HasAVX512>;
3118 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v1i1_info, HasBWI>;
3119 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v1i1_info, HasBWI>;
3120 defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v2i1_info, HasAVX512>;
3121 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v2i1_info, HasBWI>;
3122 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v2i1_info, HasBWI>;
3123 defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v4i1_info, HasAVX512>;
3124 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v4i1_info, HasBWI>;
3125 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v4i1_info, HasBWI>;
3126 defm : vextract_for_mask_to_mask<"KSHIFTRW", v16i1_info, v8i1_info, HasAVX512>;
3127 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v8i1_info, HasBWI>;
3128 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v8i1_info, HasBWI>;
3129 defm : vextract_for_mask_to_mask<"KSHIFTRD", v32i1_info, v16i1_info, HasBWI>;
3130 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v16i1_info, HasBWI>;
3131 defm : vextract_for_mask_to_mask<"KSHIFTRQ", v64i1_info, v32i1_info, HasBWI>;
3133 // Patterns for kmask shift
3134 multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
3135 def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
3136 (VT (COPY_TO_REGCLASS
3137 (KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
3140 def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
3141 (VT (COPY_TO_REGCLASS
3142 (KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
3147 defm : mask_shift_lowering<VK8, v8i1>, Requires<[HasAVX512, NoDQI]>;
3148 defm : mask_shift_lowering<VK4, v4i1>, Requires<[HasAVX512]>;
3149 defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
3150 //===----------------------------------------------------------------------===//
3151 // AVX-512 - Aligned and unaligned load and store
3155 multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3156 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3157 bit NoRMPattern = 0,
3158 SDPatternOperator SelectOprr = vselect> {
3159 let hasSideEffects = 0 in {
3160 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3161 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3162 _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>;
3163 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3164 (ins _.KRCWM:$mask, _.RC:$src),
3165 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3166 "${dst} {${mask}} {z}, $src}"),
3167 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3169 _.ImmAllZerosV)))], _.ExeDomain,
3170 itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>;
3172 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3173 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3174 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3175 !if(NoRMPattern, [],
3177 (_.VT (bitconvert (ld_frag addr:$src))))]),
3178 _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>;
3180 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3181 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3182 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3183 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3184 "${dst} {${mask}}, $src1}"),
3185 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3187 (_.VT _.RC:$src0))))], _.ExeDomain,
3188 itins.rr>, EVEX, EVEX_K, Sched<[WriteMove]>;
3189 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3190 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3191 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3192 "${dst} {${mask}}, $src1}"),
3193 [(set _.RC:$dst, (_.VT
3194 (vselect _.KRCWM:$mask,
3195 (_.VT (bitconvert (ld_frag addr:$src1))),
3196 (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>,
3197 EVEX, EVEX_K, Sched<[WriteLoad]>;
3199 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3200 (ins _.KRCWM:$mask, _.MemOp:$src),
3201 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3202 "${dst} {${mask}} {z}, $src}",
3203 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3204 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3205 _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
3207 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3208 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3210 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3211 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3213 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3214 (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3215 _.KRCWM:$mask, addr:$ptr)>;
3218 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3219 AVX512VLVectorVTInfo _,
3221 let Predicates = [prd] in
3222 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
3223 _.info512.AlignedLdFrag, masked_load_aligned512>,
3226 let Predicates = [prd, HasVLX] in {
3227 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
3228 _.info256.AlignedLdFrag, masked_load_aligned256>,
3230 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
3231 _.info128.AlignedLdFrag, masked_load_aligned128>,
3236 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3237 AVX512VLVectorVTInfo _,
3239 bit NoRMPattern = 0,
3240 SDPatternOperator SelectOprr = vselect> {
3241 let Predicates = [prd] in
3242 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info512, _.info512.LdFrag,
3243 masked_load_unaligned, NoRMPattern,
3244 SelectOprr>, EVEX_V512;
3246 let Predicates = [prd, HasVLX] in {
3247 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info256, _.info256.LdFrag,
3248 masked_load_unaligned, NoRMPattern,
3249 SelectOprr>, EVEX_V256;
3250 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info128, _.info128.LdFrag,
3251 masked_load_unaligned, NoRMPattern,
3252 SelectOprr>, EVEX_V128;
3256 multiclass avx512_store<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3257 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3258 string Name, bit NoMRPattern = 0> {
3259 let hasSideEffects = 0 in {
3260 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3261 OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
3262 [], _.ExeDomain, itins.rr>, EVEX, FoldGenData<Name#rr>,
3264 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3265 (ins _.KRCWM:$mask, _.RC:$src),
3266 OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3267 "${dst} {${mask}}, $src}",
3268 [], _.ExeDomain, itins.rr>, EVEX, EVEX_K,
3269 FoldGenData<Name#rrk>, Sched<[WriteMove]>;
3270 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3271 (ins _.KRCWM:$mask, _.RC:$src),
3272 OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
3273 "${dst} {${mask}} {z}, $src}",
3274 [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ,
3275 FoldGenData<Name#rrkz>, Sched<[WriteMove]>;
3278 let hasSideEffects = 0, mayStore = 1 in
3279 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3280 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3281 !if(NoMRPattern, [],
3282 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3283 _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>;
3284 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3285 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3286 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3287 [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>;
3289 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3290 (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3291 _.KRCWM:$mask, _.RC:$src)>;
3295 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3296 AVX512VLVectorVTInfo _, Predicate prd,
3297 string Name, bit NoMRPattern = 0> {
3298 let Predicates = [prd] in
3299 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info512, store,
3300 masked_store_unaligned, Name#Z, NoMRPattern>, EVEX_V512;
3302 let Predicates = [prd, HasVLX] in {
3303 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info256, store,
3304 masked_store_unaligned, Name#Z256,
3305 NoMRPattern>, EVEX_V256;
3306 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info128, store,
3307 masked_store_unaligned, Name#Z128,
3308 NoMRPattern>, EVEX_V128;
3312 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3313 AVX512VLVectorVTInfo _, Predicate prd,
3315 let Predicates = [prd] in
3316 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore,
3317 masked_store_aligned512, Name#Z>, EVEX_V512;
3319 let Predicates = [prd, HasVLX] in {
3320 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore,
3321 masked_store_aligned256, Name#Z256>, EVEX_V256;
3322 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore,
3323 masked_store_aligned128, Name#Z128>, EVEX_V128;
3327 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3329 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3330 HasAVX512, "VMOVAPS">,
3331 PS, EVEX_CD8<32, CD8VF>;
3333 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3335 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3336 HasAVX512, "VMOVAPD">,
3337 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3339 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3341 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3343 PS, EVEX_CD8<32, CD8VF>;
3345 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3347 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3349 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3351 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3353 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3354 HasAVX512, "VMOVDQA32">,
3355 PD, EVEX_CD8<32, CD8VF>;
3357 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3359 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3360 HasAVX512, "VMOVDQA64">,
3361 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3363 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
3364 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
3365 HasBWI, "VMOVDQU8", 1>,
3366 XD, EVEX_CD8<8, CD8VF>;
3368 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
3369 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
3370 HasBWI, "VMOVDQU16", 1>,
3371 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3373 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3375 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
3376 HasAVX512, "VMOVDQU32">,
3377 XS, EVEX_CD8<32, CD8VF>;
3379 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3381 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
3382 HasAVX512, "VMOVDQU64">,
3383 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3385 // Special instructions to help with spilling when we don't have VLX. We need
3386 // to load or store from a ZMM register instead. These are converted in
3387 // expandPostRAPseudos.
3388 let isReMaterializable = 1, canFoldAsLoad = 1,
3389 isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3390 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3391 "", [], IIC_SSE_MOVA_P_RM>;
3392 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3393 "", [], IIC_SSE_MOVA_P_RM>;
3394 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3395 "", [], IIC_SSE_MOVA_P_RM>;
3396 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3397 "", [], IIC_SSE_MOVA_P_RM>;
3400 let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in {
3401 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3402 "", [], IIC_SSE_MOVA_P_MR>;
3403 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3404 "", [], IIC_SSE_MOVA_P_MR>;
3405 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3406 "", [], IIC_SSE_MOVA_P_MR>;
3407 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3408 "", [], IIC_SSE_MOVA_P_MR>;
3411 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3412 (v8i64 VR512:$src))),
3413 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3416 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3417 (v16i32 VR512:$src))),
3418 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3420 // These patterns exist to prevent the above patterns from introducing a second
3421 // mask inversion when one already exists.
3422 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3423 (bc_v8i64 (v16i32 immAllZerosV)),
3424 (v8i64 VR512:$src))),
3425 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3426 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3427 (v16i32 immAllZerosV),
3428 (v16i32 VR512:$src))),
3429 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3431 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3432 // available. Use a 512-bit operation and extract.
3433 let Predicates = [HasAVX512, NoVLX] in {
3434 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
3435 (v8f32 VR256X:$src0))),
3439 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
3440 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
3441 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
3444 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
3445 (v8i32 VR256X:$src0))),
3449 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
3450 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
3451 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
3455 let Predicates = [HasAVX512] in {
3457 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3458 (VMOVDQA32Zmr addr:$dst, VR512:$src)>;
3459 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3460 (VMOVDQA32Zmr addr:$dst, VR512:$src)>;
3461 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3462 (VMOVDQU32Zmr addr:$dst, VR512:$src)>;
3463 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3464 (VMOVDQU32Zmr addr:$dst, VR512:$src)>;
3467 let Predicates = [HasVLX] in {
3469 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3470 (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3471 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3472 (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3473 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3474 (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3475 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3476 (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3479 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3480 (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3481 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3482 (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3483 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3484 (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3485 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3486 (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3489 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3490 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3491 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3493 (To.VT (extract_subvector
3494 (From.VT From.RC:$src), (iPTR 0)))),
3496 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3497 Cast.RC:$src0, Cast.KRCWM:$mask,
3498 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3500 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3502 (To.VT (extract_subvector
3503 (From.VT From.RC:$src), (iPTR 0)))),
3504 Cast.ImmAllZerosV)),
3505 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3507 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3511 let Predicates = [HasVLX] in {
3512 // A masked extract from the first 128-bits of a 256-bit vector can be
3513 // implemented with masked move.
3514 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3515 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3516 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3517 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3518 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3519 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3520 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3521 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3522 defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3523 defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3524 defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3525 defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
3527 // A masked extract from the first 128-bits of a 512-bit vector can be
3528 // implemented with masked move.
3529 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3530 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3531 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3532 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3533 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3534 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3535 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3536 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3537 defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3538 defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3539 defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3540 defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
3542 // A masked extract from the first 256-bits of a 512-bit vector can be
3543 // implemented with masked move.
3544 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3545 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3546 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3547 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3548 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3549 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3550 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3551 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3552 defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3553 defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3554 defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3555 defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
3558 // Move Int Doubleword to Packed Double Int
3560 let ExeDomain = SSEPackedInt in {
3561 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3562 "vmovd\t{$src, $dst|$dst, $src}",
3564 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
3565 EVEX, Sched<[WriteMove]>;
3566 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3567 "vmovd\t{$src, $dst|$dst, $src}",
3569 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
3570 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
3571 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3572 "vmovq\t{$src, $dst|$dst, $src}",
3574 (v2i64 (scalar_to_vector GR64:$src)))],
3575 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3576 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3577 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3579 "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3580 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>;
3581 let isCodeGenOnly = 1 in {
3582 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3583 "vmovq\t{$src, $dst|$dst, $src}",
3584 [(set FR64X:$dst, (bitconvert GR64:$src))],
3585 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3586 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3587 "vmovq\t{$src, $dst|$dst, $src}",
3588 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3589 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
3590 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3591 "vmovq\t{$src, $dst|$dst, $src}",
3592 [(set GR64:$dst, (bitconvert FR64X:$src))],
3593 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3594 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3595 "vmovq\t{$src, $dst|$dst, $src}",
3596 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
3597 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3598 EVEX_CD8<64, CD8VT1>;
3600 } // ExeDomain = SSEPackedInt
3602 // Move Int Doubleword to Single Scalar
3604 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3605 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3606 "vmovd\t{$src, $dst|$dst, $src}",
3607 [(set FR32X:$dst, (bitconvert GR32:$src))],
3608 IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>;
3610 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3611 "vmovd\t{$src, $dst|$dst, $src}",
3612 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
3613 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
3614 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3616 // Move doubleword from xmm register to r/m32
3618 let ExeDomain = SSEPackedInt in {
3619 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3620 "vmovd\t{$src, $dst|$dst, $src}",
3621 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3622 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
3623 EVEX, Sched<[WriteMove]>;
3624 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3625 (ins i32mem:$dst, VR128X:$src),
3626 "vmovd\t{$src, $dst|$dst, $src}",
3627 [(store (i32 (extractelt (v4i32 VR128X:$src),
3628 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
3629 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
3630 } // ExeDomain = SSEPackedInt
3632 // Move quadword from xmm1 register to r/m64
3634 let ExeDomain = SSEPackedInt in {
3635 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3636 "vmovq\t{$src, $dst|$dst, $src}",
3637 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3639 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>,
3640 Requires<[HasAVX512, In64BitMode]>;
3642 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3643 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3644 "vmovq\t{$src, $dst|$dst, $src}",
3645 [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>,
3646 Requires<[HasAVX512, In64BitMode]>;
3648 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3649 (ins i64mem:$dst, VR128X:$src),
3650 "vmovq\t{$src, $dst|$dst, $src}",
3651 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3652 addr:$dst)], IIC_SSE_MOVDQ>,
3653 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3654 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3656 let hasSideEffects = 0 in
3657 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3659 "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3660 EVEX, VEX_W, Sched<[WriteMove]>;
3661 } // ExeDomain = SSEPackedInt
3663 // Move Scalar Single to Double Int
3665 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3666 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3668 "vmovd\t{$src, $dst|$dst, $src}",
3669 [(set GR32:$dst, (bitconvert FR32X:$src))],
3670 IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>;
3671 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3672 (ins i32mem:$dst, FR32X:$src),
3673 "vmovd\t{$src, $dst|$dst, $src}",
3674 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
3675 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
3676 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3678 // Move Quadword Int to Packed Quadword Int
3680 let ExeDomain = SSEPackedInt in {
3681 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3683 "vmovq\t{$src, $dst|$dst, $src}",
3685 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3686 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
3687 } // ExeDomain = SSEPackedInt
3689 //===----------------------------------------------------------------------===//
3690 // AVX-512 MOVSS, MOVSD
3691 //===----------------------------------------------------------------------===//
3693 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3694 X86VectorVTInfo _> {
3695 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3696 (ins _.RC:$src1, _.RC:$src2),
3697 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3698 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3699 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>;
3700 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3701 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3702 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3703 "$dst {${mask}} {z}, $src1, $src2}"),
3704 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3705 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3707 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>;
3708 let Constraints = "$src0 = $dst" in
3709 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3710 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3711 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3712 "$dst {${mask}}, $src1, $src2}"),
3713 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3714 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3715 (_.VT _.RC:$src0))))],
3716 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>;
3717 let canFoldAsLoad = 1, isReMaterializable = 1 in
3718 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3719 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3720 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3721 _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>;
3722 let mayLoad = 1, hasSideEffects = 0 in {
3723 let Constraints = "$src0 = $dst" in
3724 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3725 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3726 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3727 "$dst {${mask}}, $src}"),
3728 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>;
3729 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3730 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3731 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3732 "$dst {${mask}} {z}, $src}"),
3733 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
3735 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3736 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3737 [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
3738 EVEX, Sched<[WriteStore]>;
3739 let mayStore = 1, hasSideEffects = 0 in
3740 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3741 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3742 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3743 [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>;
3746 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3747 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3749 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3750 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3753 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3754 PatLeaf ZeroFP, X86VectorVTInfo _> {
3756 def : Pat<(_.VT (OpNode _.RC:$src0,
3757 (_.VT (scalar_to_vector
3758 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3759 (_.EltVT _.FRC:$src1),
3760 (_.EltVT _.FRC:$src2))))))),
3761 (!cast<Instruction>(InstrStr#rrk)
3762 (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
3763 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3765 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
3767 def : Pat<(_.VT (OpNode _.RC:$src0,
3768 (_.VT (scalar_to_vector
3769 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3770 (_.EltVT _.FRC:$src1),
3771 (_.EltVT ZeroFP))))))),
3772 (!cast<Instruction>(InstrStr#rrkz)
3773 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3775 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
3778 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3779 dag Mask, RegisterClass MaskRC> {
3781 def : Pat<(masked_store addr:$dst, Mask,
3782 (_.info512.VT (insert_subvector undef,
3783 (_.info256.VT (insert_subvector undef,
3784 (_.info128.VT _.info128.RC:$src),
3787 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3788 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3789 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3793 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3794 AVX512VLVectorVTInfo _,
3795 dag Mask, RegisterClass MaskRC,
3796 SubRegIndex subreg> {
3798 def : Pat<(masked_store addr:$dst, Mask,
3799 (_.info512.VT (insert_subvector undef,
3800 (_.info256.VT (insert_subvector undef,
3801 (_.info128.VT _.info128.RC:$src),
3804 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3805 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3806 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3810 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3811 dag Mask, RegisterClass MaskRC> {
3813 def : Pat<(_.info128.VT (extract_subvector
3814 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3815 (_.info512.VT (bitconvert
3816 (v16i32 immAllZerosV))))),
3818 (!cast<Instruction>(InstrStr#rmkz)
3819 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3822 def : Pat<(_.info128.VT (extract_subvector
3823 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3824 (_.info512.VT (insert_subvector undef,
3825 (_.info256.VT (insert_subvector undef,
3826 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3830 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3831 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3836 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3837 AVX512VLVectorVTInfo _,
3838 dag Mask, RegisterClass MaskRC,
3839 SubRegIndex subreg> {
3841 def : Pat<(_.info128.VT (extract_subvector
3842 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3843 (_.info512.VT (bitconvert
3844 (v16i32 immAllZerosV))))),
3846 (!cast<Instruction>(InstrStr#rmkz)
3847 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3850 def : Pat<(_.info128.VT (extract_subvector
3851 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3852 (_.info512.VT (insert_subvector undef,
3853 (_.info256.VT (insert_subvector undef,
3854 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3858 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3859 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3864 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3865 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3867 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3868 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3869 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3870 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3871 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3872 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3874 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3875 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3876 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3877 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3878 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3879 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3881 def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3882 (f32 FR32X:$src1), (f32 FR32X:$src2))),
3884 (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3885 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3886 GR8:$mask, sub_8bit)), VK1WM),
3887 (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
3890 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
3891 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3892 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
3893 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
3895 def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3896 (f64 FR64X:$src1), (f64 FR64X:$src2))),
3898 (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3899 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3900 GR8:$mask, sub_8bit)), VK1WM),
3901 (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
3904 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
3905 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3906 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
3907 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
3909 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
3910 (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
3911 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3913 let hasSideEffects = 0 in {
3914 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3915 (ins VR128X:$src1, VR128X:$src2),
3916 "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3917 [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG,
3918 FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>;
3920 let Constraints = "$src0 = $dst" in
3921 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3922 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
3923 VR128X:$src1, VR128X:$src2),
3924 "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
3925 "$dst {${mask}}, $src1, $src2}",
3926 [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG,
3927 FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>;
3929 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3930 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
3931 "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3932 "$dst {${mask}} {z}, $src1, $src2}",
3933 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
3934 FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>;
3936 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3937 (ins VR128X:$src1, VR128X:$src2),
3938 "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3939 [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W,
3940 FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>;
3942 let Constraints = "$src0 = $dst" in
3943 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3944 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
3945 VR128X:$src1, VR128X:$src2),
3946 "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
3947 "$dst {${mask}}, $src1, $src2}",
3948 [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG,
3949 VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>;
3951 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3952 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
3954 "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3955 "$dst {${mask}} {z}, $src1, $src2}",
3956 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
3957 VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>;
3960 let Predicates = [HasAVX512] in {
3961 let AddedComplexity = 15 in {
3962 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
3963 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
3964 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
3965 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
3966 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
3967 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3968 (COPY_TO_REGCLASS FR64X:$src, VR128))>;
3971 // Move low f32 and clear high bits.
3972 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
3973 (SUBREG_TO_REG (i32 0),
3974 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
3975 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
3976 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
3977 (SUBREG_TO_REG (i32 0),
3978 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
3979 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
3980 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
3981 (SUBREG_TO_REG (i32 0),
3982 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
3983 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
3984 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
3985 (SUBREG_TO_REG (i32 0),
3986 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
3987 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
3989 let AddedComplexity = 20 in {
3990 // MOVSSrm zeros the high parts of the register; represent this
3991 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3992 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
3993 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3994 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
3995 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3996 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
3997 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3998 def : Pat<(v4f32 (X86vzload addr:$src)),
3999 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4001 // MOVSDrm zeros the high parts of the register; represent this
4002 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4003 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4004 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4005 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4006 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4007 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4008 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4009 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4010 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4011 def : Pat<(v2f64 (X86vzload addr:$src)),
4012 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4014 // Represent the same patterns above but in the form they appear for
4016 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4017 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4018 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4019 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4020 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4021 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4022 def : Pat<(v8f32 (X86vzload addr:$src)),
4023 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4024 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4025 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4026 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4027 def : Pat<(v4f64 (X86vzload addr:$src)),
4028 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4030 // Represent the same patterns above but in the form they appear for
4032 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4033 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4034 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4035 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4036 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4037 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4038 def : Pat<(v16f32 (X86vzload addr:$src)),
4039 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4040 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4041 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4042 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4043 def : Pat<(v8f64 (X86vzload addr:$src)),
4044 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4046 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4047 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4048 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4050 // Move low f64 and clear high bits.
4051 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4052 (SUBREG_TO_REG (i32 0),
4053 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4054 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
4055 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4056 (SUBREG_TO_REG (i32 0),
4057 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4058 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
4060 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4061 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4062 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
4063 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4064 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4065 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
4067 // Extract and store.
4068 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4070 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4072 // Shuffle with VMOVSS
4073 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
4074 (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4076 def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4077 (VMOVSSZrr VR128X:$src1,
4078 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
4080 // Shuffle with VMOVSD
4081 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
4082 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4084 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4085 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
4087 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
4088 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4089 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
4090 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4093 let AddedComplexity = 15 in
4094 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4096 "vmovq\t{$src, $dst|$dst, $src}",
4097 [(set VR128X:$dst, (v2i64 (X86vzmovl
4098 (v2i64 VR128X:$src))))],
4099 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
4101 let Predicates = [HasAVX512] in {
4102 let AddedComplexity = 15 in {
4103 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4104 (VMOVDI2PDIZrr GR32:$src)>;
4106 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4107 (VMOV64toPQIZrr GR64:$src)>;
4109 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4110 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4111 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4113 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4114 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4115 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4117 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4118 let AddedComplexity = 20 in {
4119 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4120 (VMOVDI2PDIZrm addr:$src)>;
4121 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4122 (VMOVDI2PDIZrm addr:$src)>;
4123 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4124 (VMOVDI2PDIZrm addr:$src)>;
4125 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4126 (VMOVDI2PDIZrm addr:$src)>;
4127 def : Pat<(v4i32 (X86vzload addr:$src)),
4128 (VMOVDI2PDIZrm addr:$src)>;
4129 def : Pat<(v8i32 (X86vzload addr:$src)),
4130 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4131 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4132 (VMOVQI2PQIZrm addr:$src)>;
4133 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4134 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4135 def : Pat<(v2i64 (X86vzload addr:$src)),
4136 (VMOVQI2PQIZrm addr:$src)>;
4137 def : Pat<(v4i64 (X86vzload addr:$src)),
4138 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4141 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4142 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4143 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4144 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4145 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4146 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4147 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4149 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4150 def : Pat<(v16i32 (X86vzload addr:$src)),
4151 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4152 def : Pat<(v8i64 (X86vzload addr:$src)),
4153 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4155 //===----------------------------------------------------------------------===//
4156 // AVX-512 - Non-temporals
4157 //===----------------------------------------------------------------------===//
4158 let SchedRW = [WriteLoad] in {
4159 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4160 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4161 [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
4162 EVEX_CD8<64, CD8VF>;
4164 let Predicates = [HasVLX] in {
4165 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4167 "vmovntdqa\t{$src, $dst|$dst, $src}",
4168 [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
4169 EVEX_CD8<64, CD8VF>;
4171 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4173 "vmovntdqa\t{$src, $dst|$dst, $src}",
4174 [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
4175 EVEX_CD8<64, CD8VF>;
4179 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4180 PatFrag st_frag = alignednontemporalstore,
4181 InstrItinClass itin = IIC_SSE_MOVNT> {
4182 let SchedRW = [WriteStore], AddedComplexity = 400 in
4183 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4184 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4185 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4186 _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4189 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4190 AVX512VLVectorVTInfo VTInfo> {
4191 let Predicates = [HasAVX512] in
4192 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
4194 let Predicates = [HasAVX512, HasVLX] in {
4195 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4196 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
4200 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4201 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4202 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
4204 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4205 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4206 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4207 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4208 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4209 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4210 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4212 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4213 (VMOVNTDQAZrm addr:$src)>;
4214 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4215 (VMOVNTDQAZrm addr:$src)>;
4216 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4217 (VMOVNTDQAZrm addr:$src)>;
4220 let Predicates = [HasVLX], AddedComplexity = 400 in {
4221 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4222 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4223 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4224 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4225 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4226 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4228 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4229 (VMOVNTDQAZ256rm addr:$src)>;
4230 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4231 (VMOVNTDQAZ256rm addr:$src)>;
4232 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4233 (VMOVNTDQAZ256rm addr:$src)>;
4235 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4236 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4237 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4238 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4239 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4240 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4242 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4243 (VMOVNTDQAZ128rm addr:$src)>;
4244 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4245 (VMOVNTDQAZ128rm addr:$src)>;
4246 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4247 (VMOVNTDQAZ128rm addr:$src)>;
4250 //===----------------------------------------------------------------------===//
4251 // AVX-512 - Integer arithmetic
4253 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4254 X86VectorVTInfo _, OpndItins itins,
4255 bit IsCommutable = 0> {
4256 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4257 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4258 "$src2, $src1", "$src1, $src2",
4259 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4260 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4261 Sched<[itins.Sched]>;
4263 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4264 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4265 "$src2, $src1", "$src1, $src2",
4266 (_.VT (OpNode _.RC:$src1,
4267 (bitconvert (_.LdFrag addr:$src2)))),
4268 itins.rm>, AVX512BIBase, EVEX_4V,
4269 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4272 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4273 X86VectorVTInfo _, OpndItins itins,
4274 bit IsCommutable = 0> :
4275 avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
4276 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4277 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4278 "${src2}"##_.BroadcastStr##", $src1",
4279 "$src1, ${src2}"##_.BroadcastStr,
4280 (_.VT (OpNode _.RC:$src1,
4282 (_.ScalarLdFrag addr:$src2)))),
4283 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4284 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4287 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4288 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4289 Predicate prd, bit IsCommutable = 0> {
4290 let Predicates = [prd] in
4291 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4292 IsCommutable>, EVEX_V512;
4294 let Predicates = [prd, HasVLX] in {
4295 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4296 IsCommutable>, EVEX_V256;
4297 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4298 IsCommutable>, EVEX_V128;
4302 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4303 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4304 Predicate prd, bit IsCommutable = 0> {
4305 let Predicates = [prd] in
4306 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4307 IsCommutable>, EVEX_V512;
4309 let Predicates = [prd, HasVLX] in {
4310 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4311 IsCommutable>, EVEX_V256;
4312 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4313 IsCommutable>, EVEX_V128;
4317 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4318 OpndItins itins, Predicate prd,
4319 bit IsCommutable = 0> {
4320 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4321 itins, prd, IsCommutable>,
4322 VEX_W, EVEX_CD8<64, CD8VF>;
4325 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4326 OpndItins itins, Predicate prd,
4327 bit IsCommutable = 0> {
4328 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4329 itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4332 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4333 OpndItins itins, Predicate prd,
4334 bit IsCommutable = 0> {
4335 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4336 itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4340 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4341 OpndItins itins, Predicate prd,
4342 bit IsCommutable = 0> {
4343 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4344 itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4348 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4349 SDNode OpNode, OpndItins itins, Predicate prd,
4350 bit IsCommutable = 0> {
4351 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
4354 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
4358 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4359 SDNode OpNode, OpndItins itins, Predicate prd,
4360 bit IsCommutable = 0> {
4361 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
4364 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
4368 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4369 bits<8> opc_d, bits<8> opc_q,
4370 string OpcodeStr, SDNode OpNode,
4371 OpndItins itins, bit IsCommutable = 0> {
4372 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4373 itins, HasAVX512, IsCommutable>,
4374 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4375 itins, HasBWI, IsCommutable>;
4378 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
4379 SDNode OpNode,X86VectorVTInfo _Src,
4380 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4381 bit IsCommutable = 0> {
4382 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4383 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4384 "$src2, $src1","$src1, $src2",
4386 (_Src.VT _Src.RC:$src1),
4387 (_Src.VT _Src.RC:$src2))),
4388 itins.rr, IsCommutable>,
4389 AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
4390 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4391 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4392 "$src2, $src1", "$src1, $src2",
4393 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4394 (bitconvert (_Src.LdFrag addr:$src2)))),
4395 itins.rm>, AVX512BIBase, EVEX_4V,
4396 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4398 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4399 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4401 "${src2}"##_Brdct.BroadcastStr##", $src1",
4402 "$src1, ${src2}"##_Brdct.BroadcastStr,
4403 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4404 (_Brdct.VT (X86VBroadcast
4405 (_Brdct.ScalarLdFrag addr:$src2)))))),
4406 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4407 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4410 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4411 SSE_INTALU_ITINS_P, 1>;
4412 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4413 SSE_INTALU_ITINS_P, 0>;
4414 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4415 SSE_INTALU_ITINS_P, HasBWI, 1>;
4416 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4417 SSE_INTALU_ITINS_P, HasBWI, 0>;
4418 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4419 SSE_INTALU_ITINS_P, HasBWI, 1>;
4420 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4421 SSE_INTALU_ITINS_P, HasBWI, 0>;
4422 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4423 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4424 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4425 SSE_INTALU_ITINS_P, HasBWI, 1>;
4426 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4427 SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
4428 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
4430 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
4432 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
4434 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4435 SSE_INTALU_ITINS_P, HasBWI, 1>;
4437 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
4438 AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4439 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4440 let Predicates = [prd] in
4441 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4442 _SrcVTInfo.info512, _DstVTInfo.info512,
4443 v8i64_info, IsCommutable>,
4444 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4445 let Predicates = [HasVLX, prd] in {
4446 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4447 _SrcVTInfo.info256, _DstVTInfo.info256,
4448 v4i64x_info, IsCommutable>,
4449 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4450 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4451 _SrcVTInfo.info128, _DstVTInfo.info128,
4452 v2i64x_info, IsCommutable>,
4453 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4457 defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
4458 avx512vl_i32_info, avx512vl_i64_info,
4459 X86pmuldq, HasAVX512, 1>,T8PD;
4460 defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
4461 avx512vl_i32_info, avx512vl_i64_info,
4462 X86pmuludq, HasAVX512, 1>;
4463 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4464 avx512vl_i8_info, avx512vl_i8_info,
4465 X86multishift, HasVBMI, 0>, T8PD;
4467 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4468 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4470 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4471 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4473 "${src2}"##_Src.BroadcastStr##", $src1",
4474 "$src1, ${src2}"##_Src.BroadcastStr,
4475 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4476 (_Src.VT (X86VBroadcast
4477 (_Src.ScalarLdFrag addr:$src2)))))),
4478 itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4479 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4482 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4483 SDNode OpNode,X86VectorVTInfo _Src,
4484 X86VectorVTInfo _Dst, OpndItins itins,
4485 bit IsCommutable = 0> {
4486 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4487 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4488 "$src2, $src1","$src1, $src2",
4490 (_Src.VT _Src.RC:$src1),
4491 (_Src.VT _Src.RC:$src2))),
4492 itins.rr, IsCommutable>,
4493 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>;
4494 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4495 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4496 "$src2, $src1", "$src1, $src2",
4497 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4498 (bitconvert (_Src.LdFrag addr:$src2)))), itins.rm>,
4499 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4500 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4503 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4505 let Predicates = [HasBWI] in
4506 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4507 v32i16_info, SSE_PACK>,
4508 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4509 v32i16_info, SSE_PACK>, EVEX_V512;
4510 let Predicates = [HasBWI, HasVLX] in {
4511 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4512 v16i16x_info, SSE_PACK>,
4513 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4514 v16i16x_info, SSE_PACK>, EVEX_V256;
4515 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4516 v8i16x_info, SSE_PACK>,
4517 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4518 v8i16x_info, SSE_PACK>, EVEX_V128;
4521 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4523 let Predicates = [HasBWI] in
4524 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
4525 v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG;
4526 let Predicates = [HasBWI, HasVLX] in {
4527 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4528 v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG;
4529 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4530 v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG;
4534 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4535 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4536 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4537 let Predicates = [HasBWI] in
4538 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4539 _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512;
4540 let Predicates = [HasBWI, HasVLX] in {
4541 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4542 _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256;
4543 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4544 _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128;
4548 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4549 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4550 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4551 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4553 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4554 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4555 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4556 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4558 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4559 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4560 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4561 SSE_INTALU_ITINS_P, HasBWI, 1>;
4562 defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
4563 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4565 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4566 SSE_INTALU_ITINS_P, HasBWI, 1>;
4567 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4568 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4569 defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
4570 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4572 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4573 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4574 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4575 SSE_INTALU_ITINS_P, HasBWI, 1>;
4576 defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
4577 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4579 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4580 SSE_INTALU_ITINS_P, HasBWI, 1>;
4581 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4582 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4583 defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
4584 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4586 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4587 let Predicates = [HasDQI, NoVLX] in {
4588 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4591 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4592 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4595 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4598 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4599 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4603 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4604 let Predicates = [HasDQI, NoVLX] in {
4605 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4608 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4609 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4612 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4615 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4616 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4620 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4621 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4624 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4625 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4628 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4631 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4632 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4636 let Predicates = [HasAVX512] in {
4637 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4638 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4639 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4640 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4643 //===----------------------------------------------------------------------===//
4644 // AVX-512 Logical Instructions
4645 //===----------------------------------------------------------------------===//
4647 // OpNodeMsk is the OpNode to use when element size is important. OpNode will
4648 // be set to null_frag for 32-bit elements.
4649 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
4650 SDPatternOperator OpNode,
4651 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
4652 bit IsCommutable = 0> {
4653 let hasSideEffects = 0 in
4654 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4655 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4656 "$src2, $src1", "$src1, $src2",
4657 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4658 (bitconvert (_.VT _.RC:$src2)))),
4659 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4661 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4662 Sched<[itins.Sched]>;
4664 let hasSideEffects = 0, mayLoad = 1 in
4665 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4666 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4667 "$src2, $src1", "$src1, $src2",
4668 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4669 (bitconvert (_.LdFrag addr:$src2)))),
4670 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4671 (bitconvert (_.LdFrag addr:$src2)))))),
4672 itins.rm>, AVX512BIBase, EVEX_4V,
4673 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4676 // OpNodeMsk is the OpNode to use where element size is important. So use
4677 // for all of the broadcast patterns.
4678 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
4679 SDPatternOperator OpNode,
4680 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
4681 bit IsCommutable = 0> :
4682 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _,
4684 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4685 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4686 "${src2}"##_.BroadcastStr##", $src1",
4687 "$src1, ${src2}"##_.BroadcastStr,
4688 (_.i64VT (OpNodeMsk _.RC:$src1,
4690 (_.VT (X86VBroadcast
4691 (_.ScalarLdFrag addr:$src2)))))),
4692 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4694 (_.VT (X86VBroadcast
4695 (_.ScalarLdFrag addr:$src2)))))))),
4696 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4697 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4700 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
4701 SDPatternOperator OpNode,
4702 SDNode OpNodeMsk, OpndItins itins,
4703 AVX512VLVectorVTInfo VTInfo,
4704 bit IsCommutable = 0> {
4705 let Predicates = [HasAVX512] in
4706 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4707 VTInfo.info512, IsCommutable>, EVEX_V512;
4709 let Predicates = [HasAVX512, HasVLX] in {
4710 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4711 VTInfo.info256, IsCommutable>, EVEX_V256;
4712 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4713 VTInfo.info128, IsCommutable>, EVEX_V128;
4717 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4718 SDNode OpNode, OpndItins itins,
4719 bit IsCommutable = 0> {
4720 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins,
4721 avx512vl_i64_info, IsCommutable>,
4722 VEX_W, EVEX_CD8<64, CD8VF>;
4723 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins,
4724 avx512vl_i32_info, IsCommutable>,
4725 EVEX_CD8<32, CD8VF>;
4728 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>;
4729 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>;
4730 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>;
4731 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>;
4733 //===----------------------------------------------------------------------===//
4734 // AVX-512 FP arithmetic
4735 //===----------------------------------------------------------------------===//
4736 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4737 SDNode OpNode, SDNode VecNode, OpndItins itins,
4739 let ExeDomain = _.ExeDomain in {
4740 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4741 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4742 "$src2, $src1", "$src1, $src2",
4743 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4744 (i32 FROUND_CURRENT))),
4745 itins.rr>, Sched<[itins.Sched]>;
4747 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4748 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4749 "$src2, $src1", "$src1, $src2",
4750 (_.VT (VecNode _.RC:$src1,
4751 _.ScalarIntMemCPat:$src2,
4752 (i32 FROUND_CURRENT))),
4753 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4754 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4755 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4756 (ins _.FRC:$src1, _.FRC:$src2),
4757 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4758 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4759 itins.rr>, Sched<[itins.Sched]> {
4760 let isCommutable = IsCommutable;
4762 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4763 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4764 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4765 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4766 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4767 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4772 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4773 SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
4774 let ExeDomain = _.ExeDomain in
4775 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4776 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4777 "$rc, $src2, $src1", "$src1, $src2, $rc",
4778 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4779 (i32 imm:$rc)), itins.rr, IsCommutable>,
4780 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
4782 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4783 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4784 OpndItins itins, bit IsCommutable> {
4785 let ExeDomain = _.ExeDomain in {
4786 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4787 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4788 "$src2, $src1", "$src1, $src2",
4789 (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
4790 itins.rr>, Sched<[itins.Sched]>;
4792 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4793 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4794 "$src2, $src1", "$src1, $src2",
4795 (_.VT (VecNode _.RC:$src1,
4796 _.ScalarIntMemCPat:$src2)),
4797 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4799 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4800 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4801 (ins _.FRC:$src1, _.FRC:$src2),
4802 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4803 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4804 itins.rr>, Sched<[itins.Sched]> {
4805 let isCommutable = IsCommutable;
4807 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4808 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4809 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4810 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4811 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4812 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4815 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4816 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4817 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4818 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4819 (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B,
4820 Sched<[itins.Sched]>;
4824 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4826 SizeItins itins, bit IsCommutable> {
4827 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4828 itins.s, IsCommutable>,
4829 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4830 itins.s, IsCommutable>,
4831 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4832 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4833 itins.d, IsCommutable>,
4834 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4835 itins.d, IsCommutable>,
4836 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4839 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
4840 SDNode VecNode, SDNode SaeNode,
4841 SizeItins itins, bit IsCommutable> {
4842 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4843 VecNode, SaeNode, itins.s, IsCommutable>,
4844 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4845 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4846 VecNode, SaeNode, itins.d, IsCommutable>,
4847 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4849 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4850 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4851 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4852 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4853 defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
4854 SSE_ALU_ITINS_S, 0>;
4855 defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
4856 SSE_ALU_ITINS_S, 0>;
4858 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4859 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
4860 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4861 X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
4862 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
4863 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4864 (ins _.FRC:$src1, _.FRC:$src2),
4865 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4866 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4867 itins.rr>, Sched<[itins.Sched]> {
4868 let isCommutable = 1;
4870 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4871 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4872 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4873 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4874 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4875 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4878 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
4879 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4880 EVEX_CD8<32, CD8VT1>;
4882 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
4883 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4884 EVEX_CD8<64, CD8VT1>;
4886 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
4887 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4888 EVEX_CD8<32, CD8VT1>;
4890 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
4891 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4892 EVEX_CD8<64, CD8VT1>;
4894 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4895 X86VectorVTInfo _, OpndItins itins,
4897 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
4898 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4899 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4900 "$src2, $src1", "$src1, $src2",
4901 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
4902 IsCommutable>, EVEX_4V, Sched<[itins.Sched]>;
4903 let mayLoad = 1 in {
4904 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4905 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
4906 "$src2, $src1", "$src1, $src2",
4907 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
4908 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4909 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4910 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
4911 "${src2}"##_.BroadcastStr##", $src1",
4912 "$src1, ${src2}"##_.BroadcastStr,
4913 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
4914 (_.ScalarLdFrag addr:$src2)))),
4915 itins.rm>, EVEX_4V, EVEX_B,
4916 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4921 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4922 OpndItins itins, X86VectorVTInfo _> {
4923 let ExeDomain = _.ExeDomain in
4924 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4925 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
4926 "$rc, $src2, $src1", "$src1, $src2, $rc",
4927 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>,
4928 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
4931 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4932 OpndItins itins, X86VectorVTInfo _> {
4933 let ExeDomain = _.ExeDomain in
4934 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4935 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4936 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4937 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>,
4938 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
4941 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4942 Predicate prd, SizeItins itins,
4943 bit IsCommutable = 0> {
4944 let Predicates = [prd] in {
4945 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
4946 itins.s, IsCommutable>, EVEX_V512, PS,
4947 EVEX_CD8<32, CD8VF>;
4948 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
4949 itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
4950 EVEX_CD8<64, CD8VF>;
4953 // Define only if AVX512VL feature is present.
4954 let Predicates = [prd, HasVLX] in {
4955 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
4956 itins.s, IsCommutable>, EVEX_V128, PS,
4957 EVEX_CD8<32, CD8VF>;
4958 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
4959 itins.s, IsCommutable>, EVEX_V256, PS,
4960 EVEX_CD8<32, CD8VF>;
4961 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
4962 itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
4963 EVEX_CD8<64, CD8VF>;
4964 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
4965 itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
4966 EVEX_CD8<64, CD8VF>;
4970 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4972 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
4973 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
4974 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
4975 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4978 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4980 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
4981 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
4982 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
4983 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4986 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
4987 SSE_ALU_ITINS_P, 1>,
4988 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>;
4989 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
4990 SSE_MUL_ITINS_P, 1>,
4991 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>;
4992 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
4993 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>;
4994 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
4995 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>;
4996 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
4997 SSE_ALU_ITINS_P, 0>,
4998 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>;
4999 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5000 SSE_ALU_ITINS_P, 0>,
5001 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>;
5002 let isCodeGenOnly = 1 in {
5003 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5004 SSE_ALU_ITINS_P, 1>;
5005 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5006 SSE_ALU_ITINS_P, 1>;
5008 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5009 SSE_ALU_ITINS_P, 1>;
5010 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5011 SSE_ALU_ITINS_P, 0>;
5012 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5013 SSE_ALU_ITINS_P, 1>;
5014 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5015 SSE_ALU_ITINS_P, 1>;
5017 // Patterns catch floating point selects with bitcasted integer logic ops.
5018 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5019 X86VectorVTInfo _, Predicate prd> {
5020 let Predicates = [prd] in {
5021 // Masked register-register logical operations.
5022 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5023 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5025 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5026 _.RC:$src1, _.RC:$src2)>;
5027 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5028 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5030 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5032 // Masked register-memory logical operations.
5033 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5034 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5035 (load addr:$src2)))),
5037 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5038 _.RC:$src1, addr:$src2)>;
5039 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5040 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5042 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5044 // Register-broadcast logical operations.
5045 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5046 (bitconvert (_.VT (X86VBroadcast
5047 (_.ScalarLdFrag addr:$src2)))))),
5048 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5049 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5051 (_.i64VT (OpNode _.RC:$src1,
5054 (_.ScalarLdFrag addr:$src2))))))),
5056 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5057 _.RC:$src1, addr:$src2)>;
5058 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5060 (_.i64VT (OpNode _.RC:$src1,
5063 (_.ScalarLdFrag addr:$src2))))))),
5065 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5066 _.RC:$src1, addr:$src2)>;
5070 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5071 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5072 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5073 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5074 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5075 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5076 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5079 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5080 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5081 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5082 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5084 let Predicates = [HasVLX,HasDQI] in {
5085 // Use packed logical operations for scalar ops.
5086 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5087 (COPY_TO_REGCLASS (VANDPDZ128rr
5088 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5089 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5090 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5091 (COPY_TO_REGCLASS (VORPDZ128rr
5092 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5093 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5094 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5095 (COPY_TO_REGCLASS (VXORPDZ128rr
5096 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5097 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5098 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5099 (COPY_TO_REGCLASS (VANDNPDZ128rr
5100 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5101 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5103 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5104 (COPY_TO_REGCLASS (VANDPSZ128rr
5105 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5106 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5107 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5108 (COPY_TO_REGCLASS (VORPSZ128rr
5109 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5110 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5111 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5112 (COPY_TO_REGCLASS (VXORPSZ128rr
5113 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5114 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5115 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5116 (COPY_TO_REGCLASS (VANDNPSZ128rr
5117 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5118 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5121 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5122 OpndItins itins, X86VectorVTInfo _> {
5123 let ExeDomain = _.ExeDomain in {
5124 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5125 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5126 "$src2, $src1", "$src1, $src2",
5127 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))),
5128 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
5129 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5130 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5131 "$src2, $src1", "$src1, $src2",
5132 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)),
5133 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5134 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5135 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5136 "${src2}"##_.BroadcastStr##", $src1",
5137 "$src1, ${src2}"##_.BroadcastStr,
5138 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5139 (_.ScalarLdFrag addr:$src2))),
5140 (i32 FROUND_CURRENT)), itins.rm>,
5141 EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5145 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5146 OpndItins itins, X86VectorVTInfo _> {
5147 let ExeDomain = _.ExeDomain in {
5148 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5149 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5150 "$src2, $src1", "$src1, $src2",
5151 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>,
5152 Sched<[itins.Sched]>;
5153 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5154 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5155 "$src2, $src1", "$src1, $src2",
5156 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5157 (i32 FROUND_CURRENT)), itins.rm>,
5158 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5162 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
5163 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5164 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5165 EVEX_V512, EVEX_CD8<32, CD8VF>;
5166 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5167 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5168 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5169 defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>,
5170 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
5171 EVEX_4V,EVEX_CD8<32, CD8VT1>;
5172 defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>,
5173 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
5174 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5176 // Define only if AVX512VL feature is present.
5177 let Predicates = [HasVLX] in {
5178 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>,
5179 EVEX_V128, EVEX_CD8<32, CD8VF>;
5180 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>,
5181 EVEX_V256, EVEX_CD8<32, CD8VF>;
5182 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>,
5183 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5184 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>,
5185 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5188 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
5190 //===----------------------------------------------------------------------===//
5191 // AVX-512 VPTESTM instructions
5192 //===----------------------------------------------------------------------===//
5194 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
5195 OpndItins itins, X86VectorVTInfo _> {
5196 let ExeDomain = _.ExeDomain in {
5197 let isCommutable = 1 in
5198 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5199 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5200 "$src2, $src1", "$src1, $src2",
5201 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
5202 EVEX_4V, Sched<[itins.Sched]>;
5203 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5204 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5205 "$src2, $src1", "$src1, $src2",
5206 (OpNode (_.VT _.RC:$src1),
5207 (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>,
5208 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5209 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5213 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5214 OpndItins itins, X86VectorVTInfo _> {
5215 let ExeDomain = _.ExeDomain in
5216 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5217 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5218 "${src2}"##_.BroadcastStr##", $src1",
5219 "$src1, ${src2}"##_.BroadcastStr,
5220 (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
5221 (_.ScalarLdFrag addr:$src2)))),
5222 itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5223 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5226 // Use 512bit version to implement 128/256 bit in case NoVLX.
5227 multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
5228 X86VectorVTInfo _, string Suffix> {
5229 def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
5230 (_.KVT (COPY_TO_REGCLASS
5231 (!cast<Instruction>(NAME # Suffix # "Zrr")
5232 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5233 _.RC:$src1, _.SubRegIdx),
5234 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5235 _.RC:$src2, _.SubRegIdx)),
5239 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5240 OpndItins itins, AVX512VLVectorVTInfo _,
5242 let Predicates = [HasAVX512] in
5243 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>,
5244 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5246 let Predicates = [HasAVX512, HasVLX] in {
5247 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>,
5248 avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
5249 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>,
5250 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
5252 let Predicates = [HasAVX512, NoVLX] in {
5253 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5254 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
5258 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
5260 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
5261 avx512vl_i32_info, "D">;
5262 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
5263 avx512vl_i64_info, "Q">, VEX_W;
5266 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5267 SDNode OpNode, OpndItins itins> {
5268 let Predicates = [HasBWI] in {
5269 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>,
5271 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>,
5274 let Predicates = [HasVLX, HasBWI] in {
5276 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>,
5278 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>,
5280 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>,
5282 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>,
5286 let Predicates = [HasAVX512, NoVLX] in {
5287 defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
5288 defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
5289 defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
5290 defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
5294 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5295 SDNode OpNode, OpndItins itins> :
5296 avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
5297 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
5299 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm,
5300 SSE_BIT_ITINS_P>, T8PD;
5301 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
5302 SSE_BIT_ITINS_P>, T8XS;
5305 //===----------------------------------------------------------------------===//
5306 // AVX-512 Shift instructions
5307 //===----------------------------------------------------------------------===//
5308 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5309 string OpcodeStr, SDNode OpNode, OpndItins itins,
5310 X86VectorVTInfo _> {
5311 let ExeDomain = _.ExeDomain in {
5312 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5313 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5314 "$src2, $src1", "$src1, $src2",
5315 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
5316 itins.rr>, Sched<[itins.Sched]>;
5317 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5318 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5319 "$src2, $src1", "$src1, $src2",
5320 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5322 itins.rm>, Sched<[itins.Sched.Folded]>;
5326 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5327 string OpcodeStr, SDNode OpNode, OpndItins itins,
5328 X86VectorVTInfo _> {
5329 let ExeDomain = _.ExeDomain in
5330 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5331 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5332 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5333 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
5334 itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5337 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5338 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5339 X86VectorVTInfo _> {
5340 // src2 is always 128-bit
5341 let ExeDomain = _.ExeDomain in {
5342 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5343 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5344 "$src2, $src1", "$src1, $src2",
5345 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
5346 itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
5347 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5348 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5349 "$src2, $src1", "$src1, $src2",
5350 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
5351 itins.rm>, AVX512BIBase,
5352 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5356 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5357 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5358 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
5359 let Predicates = [prd] in
5360 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5361 VTInfo.info512>, EVEX_V512,
5362 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5363 let Predicates = [prd, HasVLX] in {
5364 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5365 VTInfo.info256>, EVEX_V256,
5366 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5367 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5368 VTInfo.info128>, EVEX_V128,
5369 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5373 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5374 string OpcodeStr, SDNode OpNode,
5376 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
5377 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5378 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
5379 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5380 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
5381 bc_v2i64, avx512vl_i16_info, HasBWI>;
5384 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5385 string OpcodeStr, SDNode OpNode,
5386 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
5387 let Predicates = [HasAVX512] in
5388 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
5390 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5391 VTInfo.info512>, EVEX_V512;
5392 let Predicates = [HasAVX512, HasVLX] in {
5393 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
5395 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5396 VTInfo.info256>, EVEX_V256;
5397 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5398 itins, VTInfo.info128>,
5399 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5400 VTInfo.info128>, EVEX_V128;
5404 multiclass avx512_shift_rmi_w<bits<8> opcw,
5405 Format ImmFormR, Format ImmFormM,
5406 string OpcodeStr, SDNode OpNode,
5408 let Predicates = [HasBWI] in
5409 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5410 itins, v32i16_info>, EVEX_V512, VEX_WIG;
5411 let Predicates = [HasVLX, HasBWI] in {
5412 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5413 itins, v16i16x_info>, EVEX_V256, VEX_WIG;
5414 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5415 itins, v8i16x_info>, EVEX_V128, VEX_WIG;
5419 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5420 Format ImmFormR, Format ImmFormM,
5421 string OpcodeStr, SDNode OpNode, OpndItins itins> {
5422 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5423 itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5424 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5425 itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5428 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5430 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5431 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5433 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5435 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5436 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5438 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5440 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5441 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5443 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5444 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5445 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5446 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5448 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
5449 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
5450 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
5452 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5453 let Predicates = [HasAVX512, NoVLX] in {
5454 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5455 (EXTRACT_SUBREG (v8i64
5457 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5458 VR128X:$src2)), sub_ymm)>;
5460 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5461 (EXTRACT_SUBREG (v8i64
5463 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5464 VR128X:$src2)), sub_xmm)>;
5466 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5467 (EXTRACT_SUBREG (v8i64
5469 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5470 imm:$src2)), sub_ymm)>;
5472 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5473 (EXTRACT_SUBREG (v8i64
5475 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5476 imm:$src2)), sub_xmm)>;
5479 //===-------------------------------------------------------------------===//
5480 // Variable Bit Shifts
5481 //===-------------------------------------------------------------------===//
5482 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5483 OpndItins itins, X86VectorVTInfo _> {
5484 let ExeDomain = _.ExeDomain in {
5485 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5486 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5487 "$src2, $src1", "$src1, $src2",
5488 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
5489 itins.rr>, AVX5128IBase, EVEX_4V,
5490 Sched<[itins.Sched]>;
5491 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5492 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5493 "$src2, $src1", "$src1, $src2",
5494 (_.VT (OpNode _.RC:$src1,
5495 (_.VT (bitconvert (_.LdFrag addr:$src2))))),
5496 itins.rm>, AVX5128IBase, EVEX_4V,
5497 EVEX_CD8<_.EltSize, CD8VF>,
5498 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5502 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5503 OpndItins itins, X86VectorVTInfo _> {
5504 let ExeDomain = _.ExeDomain in
5505 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5506 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5507 "${src2}"##_.BroadcastStr##", $src1",
5508 "$src1, ${src2}"##_.BroadcastStr,
5509 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5510 (_.ScalarLdFrag addr:$src2))))),
5511 itins.rm>, AVX5128IBase, EVEX_B,
5512 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5513 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5516 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5517 OpndItins itins, AVX512VLVectorVTInfo _> {
5518 let Predicates = [HasAVX512] in
5519 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5520 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5522 let Predicates = [HasAVX512, HasVLX] in {
5523 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5524 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5525 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5526 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
5530 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5531 SDNode OpNode, OpndItins itins> {
5532 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
5534 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
5535 avx512vl_i64_info>, VEX_W;
5538 // Use 512bit version to implement 128/256 bit in case NoVLX.
5539 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5540 SDNode OpNode, list<Predicate> p> {
5541 let Predicates = p in {
5542 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5543 (_.info256.VT _.info256.RC:$src2))),
5545 (!cast<Instruction>(OpcodeStr#"Zrr")
5546 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5547 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5550 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5551 (_.info128.VT _.info128.RC:$src2))),
5553 (!cast<Instruction>(OpcodeStr#"Zrr")
5554 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5555 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5559 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
5560 SDNode OpNode, OpndItins itins> {
5561 let Predicates = [HasBWI] in
5562 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
5564 let Predicates = [HasVLX, HasBWI] in {
5566 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
5568 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
5573 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
5574 avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
5576 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
5577 avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
5579 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
5580 avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
5582 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
5583 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
5585 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5586 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5587 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5588 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5590 // Special handing for handling VPSRAV intrinsics.
5591 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5592 list<Predicate> p> {
5593 let Predicates = p in {
5594 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5595 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5597 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5598 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5599 _.RC:$src1, addr:$src2)>;
5600 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5601 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5602 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5603 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5604 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5605 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5607 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5608 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5609 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5610 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5611 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5612 _.RC:$src1, _.RC:$src2)>;
5613 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5614 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5616 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5617 _.RC:$src1, addr:$src2)>;
5621 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5622 list<Predicate> p> :
5623 avx512_var_shift_int_lowering<InstrStr, _, p> {
5624 let Predicates = p in {
5625 def : Pat<(_.VT (X86vsrav _.RC:$src1,
5626 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5627 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5628 _.RC:$src1, addr:$src2)>;
5629 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5630 (X86vsrav _.RC:$src1,
5631 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5633 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5634 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5635 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5636 (X86vsrav _.RC:$src1,
5637 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5639 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5640 _.RC:$src1, addr:$src2)>;
5644 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5645 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5646 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5647 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5648 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5649 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5650 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5651 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5652 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5655 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5656 let Predicates = [HasAVX512, NoVLX] in {
5657 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5658 (EXTRACT_SUBREG (v8i64
5660 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5661 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5663 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5664 (EXTRACT_SUBREG (v8i64
5666 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5667 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5670 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5671 (EXTRACT_SUBREG (v16i32
5673 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5674 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5676 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5677 (EXTRACT_SUBREG (v16i32
5679 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5680 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5683 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5684 (EXTRACT_SUBREG (v8i64
5686 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5687 imm:$src2)), sub_xmm)>;
5688 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5689 (EXTRACT_SUBREG (v8i64
5691 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5692 imm:$src2)), sub_ymm)>;
5694 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5695 (EXTRACT_SUBREG (v16i32
5697 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5698 imm:$src2)), sub_xmm)>;
5699 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5700 (EXTRACT_SUBREG (v16i32
5702 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5703 imm:$src2)), sub_ymm)>;
5706 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5707 let Predicates = [HasAVX512, NoVLX] in {
5708 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5709 (EXTRACT_SUBREG (v8i64
5711 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5712 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5714 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5715 (EXTRACT_SUBREG (v8i64
5717 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5718 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5721 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5722 (EXTRACT_SUBREG (v16i32
5724 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5725 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5727 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5728 (EXTRACT_SUBREG (v16i32
5730 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5731 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5734 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5735 (EXTRACT_SUBREG (v8i64
5737 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5738 imm:$src2)), sub_xmm)>;
5739 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5740 (EXTRACT_SUBREG (v8i64
5742 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5743 imm:$src2)), sub_ymm)>;
5745 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5746 (EXTRACT_SUBREG (v16i32
5748 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5749 imm:$src2)), sub_xmm)>;
5750 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5751 (EXTRACT_SUBREG (v16i32
5753 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5754 imm:$src2)), sub_ymm)>;
5757 //===-------------------------------------------------------------------===//
5758 // 1-src variable permutation VPERMW/D/Q
5759 //===-------------------------------------------------------------------===//
5760 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5761 OpndItins itins, AVX512VLVectorVTInfo _> {
5762 let Predicates = [HasAVX512] in
5763 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5764 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5766 let Predicates = [HasAVX512, HasVLX] in
5767 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5768 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5771 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5772 string OpcodeStr, SDNode OpNode,
5773 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
5774 let Predicates = [HasAVX512] in
5775 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5776 itins, VTInfo.info512>,
5777 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5778 itins, VTInfo.info512>, EVEX_V512;
5779 let Predicates = [HasAVX512, HasVLX] in
5780 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5781 itins, VTInfo.info256>,
5782 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5783 itins, VTInfo.info256>, EVEX_V256;
5786 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5787 Predicate prd, SDNode OpNode,
5788 OpndItins itins, AVX512VLVectorVTInfo _> {
5789 let Predicates = [prd] in
5790 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5792 let Predicates = [HasVLX, prd] in {
5793 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5795 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5800 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
5801 AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
5802 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
5803 AVX2_PERMV_I, avx512vl_i8_info>;
5805 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
5806 AVX2_PERMV_I, avx512vl_i32_info>;
5807 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
5808 AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
5809 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
5810 AVX2_PERMV_F, avx512vl_f32_info>;
5811 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
5812 AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
5814 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
5815 X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
5816 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5817 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
5818 X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
5819 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5820 //===----------------------------------------------------------------------===//
5821 // AVX-512 - VPERMIL
5822 //===----------------------------------------------------------------------===//
5824 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
5825 OpndItins itins, X86VectorVTInfo _,
5826 X86VectorVTInfo Ctrl> {
5827 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
5828 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
5829 "$src2, $src1", "$src1, $src2",
5830 (_.VT (OpNode _.RC:$src1,
5831 (Ctrl.VT Ctrl.RC:$src2))), itins.rr>,
5832 T8PD, EVEX_4V, Sched<[itins.Sched]>;
5833 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5834 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
5835 "$src2, $src1", "$src1, $src2",
5838 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))),
5839 itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5840 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5841 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5842 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5843 "${src2}"##_.BroadcastStr##", $src1",
5844 "$src1, ${src2}"##_.BroadcastStr,
5847 (Ctrl.VT (X86VBroadcast
5848 (Ctrl.ScalarLdFrag addr:$src2))))),
5849 itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
5850 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5853 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
5854 OpndItins itins, AVX512VLVectorVTInfo _,
5855 AVX512VLVectorVTInfo Ctrl> {
5856 let Predicates = [HasAVX512] in {
5857 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5858 _.info512, Ctrl.info512>, EVEX_V512;
5860 let Predicates = [HasAVX512, HasVLX] in {
5861 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5862 _.info128, Ctrl.info128>, EVEX_V128;
5863 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5864 _.info256, Ctrl.info256>, EVEX_V256;
5868 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
5869 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
5870 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
5871 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
5872 X86VPermilpi, AVX_VPERMILV, _>,
5873 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
5876 let ExeDomain = SSEPackedSingle in
5877 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
5879 let ExeDomain = SSEPackedDouble in
5880 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
5881 avx512vl_i64_info>, VEX_W;
5883 //===----------------------------------------------------------------------===//
5884 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
5885 //===----------------------------------------------------------------------===//
5887 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
5888 X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
5889 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
5890 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
5891 X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
5892 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
5893 X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
5895 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5897 let Predicates = [HasBWI] in
5898 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
5900 let Predicates = [HasVLX, HasBWI] in {
5901 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
5902 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
5906 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
5908 //===----------------------------------------------------------------------===//
5909 // Move Low to High and High to Low packed FP Instructions
5910 //===----------------------------------------------------------------------===//
5911 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
5912 (ins VR128X:$src1, VR128X:$src2),
5913 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5914 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
5915 IIC_SSE_MOV_LH>, EVEX_4V;
5916 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
5917 (ins VR128X:$src1, VR128X:$src2),
5918 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5919 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
5920 IIC_SSE_MOV_LH>, EVEX_4V;
5922 //===----------------------------------------------------------------------===//
5923 // VMOVHPS/PD VMOVLPS Instructions
5924 // All patterns was taken from SSS implementation.
5925 //===----------------------------------------------------------------------===//
5926 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
5927 X86VectorVTInfo _> {
5928 let ExeDomain = _.ExeDomain in
5929 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
5930 (ins _.RC:$src1, f64mem:$src2),
5931 !strconcat(OpcodeStr,
5932 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5936 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
5937 IIC_SSE_MOV_LH>, EVEX_4V;
5940 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
5941 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5942 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
5943 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5944 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
5945 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5946 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
5947 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5949 let Predicates = [HasAVX512] in {
5951 def : Pat<(X86Movlhps VR128X:$src1,
5952 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
5953 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5954 def : Pat<(X86Movlhps VR128X:$src1,
5955 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
5956 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5958 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
5959 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
5960 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
5962 def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
5963 (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
5965 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
5966 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
5967 def : Pat<(v2f64 (X86Movsd VR128X:$src1,
5968 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
5969 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
5972 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
5973 (ins f64mem:$dst, VR128X:$src),
5974 "vmovhps\t{$src, $dst|$dst, $src}",
5975 [(store (f64 (extractelt
5976 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
5977 (bc_v2f64 (v4f32 VR128X:$src))),
5978 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5979 EVEX, EVEX_CD8<32, CD8VT2>;
5980 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
5981 (ins f64mem:$dst, VR128X:$src),
5982 "vmovhpd\t{$src, $dst|$dst, $src}",
5983 [(store (f64 (extractelt
5984 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
5985 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5986 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
5987 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
5988 (ins f64mem:$dst, VR128X:$src),
5989 "vmovlps\t{$src, $dst|$dst, $src}",
5990 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
5991 (iPTR 0))), addr:$dst)],
5993 EVEX, EVEX_CD8<32, CD8VT2>;
5994 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
5995 (ins f64mem:$dst, VR128X:$src),
5996 "vmovlpd\t{$src, $dst|$dst, $src}",
5997 [(store (f64 (extractelt (v2f64 VR128X:$src),
5998 (iPTR 0))), addr:$dst)],
6000 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6002 let Predicates = [HasAVX512] in {
6004 def : Pat<(store (f64 (extractelt
6005 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6006 (iPTR 0))), addr:$dst),
6007 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6009 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
6011 (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
6013 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6015 (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
6017 //===----------------------------------------------------------------------===//
6018 // FMA - Fused Multiply Operations
6021 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6022 X86VectorVTInfo _, string Suff> {
6023 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6024 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6025 (ins _.RC:$src2, _.RC:$src3),
6026 OpcodeStr, "$src3, $src2", "$src2, $src3",
6027 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>,
6028 AVX512FMA3Base, Sched<[WriteFMA]>;
6030 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6031 (ins _.RC:$src2, _.MemOp:$src3),
6032 OpcodeStr, "$src3, $src2", "$src2, $src3",
6033 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6034 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6036 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6037 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6038 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6039 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6041 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))),
6042 NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
6043 Sched<[WriteFMALd, ReadAfterLd]>;
6047 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6048 X86VectorVTInfo _, string Suff> {
6049 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6050 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6051 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6052 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6053 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))),
6054 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6057 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6058 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6060 let Predicates = [HasAVX512] in {
6061 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6062 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6063 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6065 let Predicates = [HasVLX, HasAVX512] in {
6066 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6067 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6068 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6069 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6073 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6074 SDNode OpNodeRnd > {
6075 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6076 avx512vl_f32_info, "PS">;
6077 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6078 avx512vl_f64_info, "PD">, VEX_W;
6081 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6082 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6083 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6084 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6085 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6086 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6089 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6090 X86VectorVTInfo _, string Suff> {
6091 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6092 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6093 (ins _.RC:$src2, _.RC:$src3),
6094 OpcodeStr, "$src3, $src2", "$src2, $src3",
6095 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1,
6096 vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
6098 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6099 (ins _.RC:$src2, _.MemOp:$src3),
6100 OpcodeStr, "$src3, $src2", "$src2, $src3",
6101 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6102 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6104 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6105 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6106 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6107 "$src2, ${src3}"##_.BroadcastStr,
6108 (_.VT (OpNode _.RC:$src2,
6109 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6110 _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
6111 Sched<[WriteFMALd, ReadAfterLd]>;
6115 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6116 X86VectorVTInfo _, string Suff> {
6117 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6118 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6119 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6120 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6121 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6122 NoItinerary, 1, 1, vselect, 1>,
6123 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6126 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6127 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6129 let Predicates = [HasAVX512] in {
6130 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6131 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6132 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6134 let Predicates = [HasVLX, HasAVX512] in {
6135 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6136 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6137 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6138 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6142 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6143 SDNode OpNodeRnd > {
6144 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6145 avx512vl_f32_info, "PS">;
6146 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6147 avx512vl_f64_info, "PD">, VEX_W;
6150 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6151 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6152 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6153 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6154 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6155 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6157 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6158 X86VectorVTInfo _, string Suff> {
6159 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6160 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6161 (ins _.RC:$src2, _.RC:$src3),
6162 OpcodeStr, "$src3, $src2", "$src2, $src3",
6163 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary,
6164 1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
6166 // Pattern is 312 order so that the load is in a different place from the
6167 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6168 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6169 (ins _.RC:$src2, _.MemOp:$src3),
6170 OpcodeStr, "$src3, $src2", "$src2, $src3",
6171 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6172 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6174 // Pattern is 312 order so that the load is in a different place from the
6175 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6176 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6177 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6178 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6179 "$src2, ${src3}"##_.BroadcastStr,
6180 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6181 _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>,
6182 AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>;
6186 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6187 X86VectorVTInfo _, string Suff> {
6188 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6189 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6190 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6191 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6192 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6193 NoItinerary, 1, 1, vselect, 1>,
6194 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6197 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6198 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6200 let Predicates = [HasAVX512] in {
6201 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6202 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6203 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6205 let Predicates = [HasVLX, HasAVX512] in {
6206 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6207 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6208 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6209 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6213 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6214 SDNode OpNodeRnd > {
6215 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6216 avx512vl_f32_info, "PS">;
6217 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6218 avx512vl_f64_info, "PD">, VEX_W;
6221 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6222 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6223 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6224 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6225 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6226 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6229 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6230 dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
6231 dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
6232 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6233 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6234 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6235 "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>,
6236 AVX512FMA3Base, Sched<[WriteFMA]>;
6238 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6239 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6240 "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>,
6241 AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6243 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6244 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6245 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb,
6246 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC,
6249 let isCodeGenOnly = 1, isCommutable = 1 in {
6250 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6251 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6252 !strconcat(OpcodeStr,
6253 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6254 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>;
6255 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6256 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6257 !strconcat(OpcodeStr,
6258 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6259 [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>;
6260 }// isCodeGenOnly = 1
6261 }// Constraints = "$src1 = $dst"
6264 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6265 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6266 SDNode OpNodeRnds1, SDNode OpNodes3,
6267 SDNode OpNodeRnds3, X86VectorVTInfo _,
6269 let ExeDomain = _.ExeDomain in {
6270 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6271 // Operands for intrinsic are in 123 order to preserve passthu
6273 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6274 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6275 _.ScalarIntMemCPat:$src3)),
6276 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
6278 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6280 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6281 (_.ScalarLdFrag addr:$src3)))), 0>;
6283 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6284 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6285 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6287 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
6289 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6291 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6292 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
6294 // One pattern is 312 order so that the load is in a different place from the
6295 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6296 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6298 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6301 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6303 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6304 _.FRC:$src1, _.FRC:$src2))), 1>;
6308 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6309 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6310 SDNode OpNodeRnds1, SDNode OpNodes3,
6311 SDNode OpNodeRnds3> {
6312 let Predicates = [HasAVX512] in {
6313 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6314 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6316 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6317 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6318 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6320 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6324 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6325 X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6326 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6327 X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6328 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6329 X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6330 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6331 X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
6333 //===----------------------------------------------------------------------===//
6334 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6335 //===----------------------------------------------------------------------===//
6336 let Constraints = "$src1 = $dst" in {
6337 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6338 OpndItins itins, X86VectorVTInfo _> {
6339 // NOTE: The SDNode have the multiply operands first with the add last.
6340 // This enables commuted load patterns to be autogenerated by tablegen.
6341 let ExeDomain = _.ExeDomain in {
6342 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6343 (ins _.RC:$src2, _.RC:$src3),
6344 OpcodeStr, "$src3, $src2", "$src2, $src3",
6345 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>,
6346 AVX512FMA3Base, Sched<[itins.Sched]>;
6348 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6349 (ins _.RC:$src2, _.MemOp:$src3),
6350 OpcodeStr, "$src3, $src2", "$src2, $src3",
6351 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6352 itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6354 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6355 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6356 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6357 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6359 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6360 _.RC:$src1), itins.rm>,
6361 AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6364 } // Constraints = "$src1 = $dst"
6366 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6367 OpndItins itins, AVX512VLVectorVTInfo _> {
6368 let Predicates = [HasIFMA] in {
6369 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>,
6370 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6372 let Predicates = [HasVLX, HasIFMA] in {
6373 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>,
6374 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6375 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>,
6376 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6380 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6381 SSE_PMADD, avx512vl_i64_info>, VEX_W;
6382 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6383 SSE_PMADD, avx512vl_i64_info>, VEX_W;
6385 //===----------------------------------------------------------------------===//
6386 // AVX-512 Scalar convert from sign integer to float/double
6387 //===----------------------------------------------------------------------===//
6389 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins,
6390 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6391 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6392 let hasSideEffects = 0 in {
6393 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6394 (ins DstVT.FRC:$src1, SrcRC:$src),
6395 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6396 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
6398 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6399 (ins DstVT.FRC:$src1, x86memop:$src),
6400 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6401 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6402 } // hasSideEffects = 0
6403 let isCodeGenOnly = 1 in {
6404 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6405 (ins DstVT.RC:$src1, SrcRC:$src2),
6406 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6407 [(set DstVT.RC:$dst,
6408 (OpNode (DstVT.VT DstVT.RC:$src1),
6410 (i32 FROUND_CURRENT)))], itins.rr>,
6411 EVEX_4V, Sched<[itins.Sched]>;
6413 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6414 (ins DstVT.RC:$src1, x86memop:$src2),
6415 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6416 [(set DstVT.RC:$dst,
6417 (OpNode (DstVT.VT DstVT.RC:$src1),
6418 (ld_frag addr:$src2),
6419 (i32 FROUND_CURRENT)))], itins.rm>,
6420 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6421 }//isCodeGenOnly = 1
6424 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins,
6425 RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> {
6426 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6427 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
6429 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
6430 [(set DstVT.RC:$dst,
6431 (OpNode (DstVT.VT DstVT.RC:$src1),
6433 (i32 imm:$rc)))], itins.rr>,
6434 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
6437 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins,
6438 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6439 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6440 defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>,
6441 avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop,
6442 ld_frag, asm>, VEX_LIG;
6445 let Predicates = [HasAVX512] in {
6446 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32,
6447 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6448 XS, EVEX_CD8<32, CD8VT1>;
6449 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64,
6450 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6451 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6452 defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32,
6453 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6454 XD, EVEX_CD8<32, CD8VT1>;
6455 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64,
6456 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6457 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6459 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6460 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6461 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6462 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6464 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6465 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6466 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
6467 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6468 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6469 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6470 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
6471 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6473 def : Pat<(f32 (sint_to_fp GR32:$src)),
6474 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6475 def : Pat<(f32 (sint_to_fp GR64:$src)),
6476 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6477 def : Pat<(f64 (sint_to_fp GR32:$src)),
6478 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6479 def : Pat<(f64 (sint_to_fp GR64:$src)),
6480 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6482 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32,
6483 v4f32x_info, i32mem, loadi32,
6484 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
6485 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64,
6486 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6487 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6488 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info,
6489 i32mem, loadi32, "cvtusi2sd{l}">,
6490 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
6491 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64,
6492 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6493 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6495 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6496 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6497 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6498 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6500 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6501 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6502 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6503 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6504 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6505 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6506 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6507 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6509 def : Pat<(f32 (uint_to_fp GR32:$src)),
6510 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6511 def : Pat<(f32 (uint_to_fp GR64:$src)),
6512 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6513 def : Pat<(f64 (uint_to_fp GR32:$src)),
6514 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6515 def : Pat<(f64 (uint_to_fp GR64:$src)),
6516 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6519 //===----------------------------------------------------------------------===//
6520 // AVX-512 Scalar convert from float/double to integer
6521 //===----------------------------------------------------------------------===//
6523 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
6524 X86VectorVTInfo DstVT, SDNode OpNode,
6525 OpndItins itins, string asm> {
6526 let Predicates = [HasAVX512] in {
6527 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
6528 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6529 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))],
6530 itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>;
6531 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
6532 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6533 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))],
6534 itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC,
6535 Sched<[itins.Sched]>;
6536 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
6537 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6538 [(set DstVT.RC:$dst, (OpNode
6539 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
6540 (i32 FROUND_CURRENT)))], itins.rm>,
6541 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6542 } // Predicates = [HasAVX512]
6545 // Convert float/double to signed/unsigned int 32/64
6546 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
6547 X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si">,
6548 XS, EVEX_CD8<32, CD8VT1>;
6549 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
6550 X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si">,
6551 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
6552 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info,
6553 X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi">,
6554 XS, EVEX_CD8<32, CD8VT1>;
6555 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info,
6556 X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi">,
6557 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
6558 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
6559 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">,
6560 XD, EVEX_CD8<64, CD8VT1>;
6561 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
6562 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">,
6563 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6564 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info,
6565 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">,
6566 XD, EVEX_CD8<64, CD8VT1>;
6567 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
6568 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">,
6569 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6571 // The SSE version of these instructions are disabled for AVX512.
6572 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6573 let Predicates = [HasAVX512] in {
6574 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
6575 (VCVTSS2SIZrr_Int VR128X:$src)>;
6576 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
6577 (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
6578 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
6579 (VCVTSS2SI64Zrr_Int VR128X:$src)>;
6580 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
6581 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
6582 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
6583 (VCVTSD2SIZrr_Int VR128X:$src)>;
6584 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
6585 (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
6586 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
6587 (VCVTSD2SI64Zrr_Int VR128X:$src)>;
6588 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
6589 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
6592 let Predicates = [HasAVX512] in {
6593 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6594 (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6595 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6596 (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6597 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6598 (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6599 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6600 (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6601 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6602 (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6603 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6604 (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6605 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6606 (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6607 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6608 (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6609 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6610 (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6611 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6612 (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6613 } // Predicates = [HasAVX512]
6615 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6616 // which produce unnecessary vmovs{s,d} instructions
6617 let Predicates = [HasAVX512] in {
6618 def : Pat<(v4f32 (X86Movss
6619 (v4f32 VR128X:$dst),
6620 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6621 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6623 def : Pat<(v4f32 (X86Movss
6624 (v4f32 VR128X:$dst),
6625 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6626 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6628 def : Pat<(v2f64 (X86Movsd
6629 (v2f64 VR128X:$dst),
6630 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6631 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6633 def : Pat<(v2f64 (X86Movsd
6634 (v2f64 VR128X:$dst),
6635 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6636 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6637 } // Predicates = [HasAVX512]
6639 // Convert float/double to signed/unsigned int 32/64 with truncation
6640 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6641 X86VectorVTInfo _DstRC, SDNode OpNode,
6642 SDNode OpNodeRnd, OpndItins itins, string aliasStr>{
6643 let Predicates = [HasAVX512] in {
6644 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6645 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6646 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>,
6647 EVEX, Sched<[itins.Sched]>;
6648 let hasSideEffects = 0 in
6649 def rrb : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6650 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6651 [], itins.rr>, EVEX, EVEX_B, Sched<[itins.Sched]>;
6652 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
6653 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6654 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))],
6655 itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6657 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6658 (!cast<Instruction>(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6659 def : InstAlias<asm # aliasStr # "\t\t{{sae}, $src, $dst|$dst, $src, {sae}}",
6660 (!cast<Instruction>(NAME # "rrb") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6661 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6662 (!cast<Instruction>(NAME # "rm") _DstRC.RC:$dst,
6663 _SrcRC.ScalarMemOp:$src), 0>;
6665 let isCodeGenOnly = 1 in {
6666 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6667 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6668 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6669 (i32 FROUND_CURRENT)))], itins.rr>,
6670 EVEX, VEX_LIG, Sched<[itins.Sched]>;
6671 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6672 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6673 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6674 (i32 FROUND_NO_EXC)))], itins.rr>,
6675 EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>;
6676 let mayLoad = 1, hasSideEffects = 0 in
6677 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6678 (ins _SrcRC.IntScalarMemOp:$src),
6679 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6680 [], itins.rm>, EVEX, VEX_LIG,
6681 Sched<[itins.Sched.Folded, ReadAfterLd]>;
6682 } // isCodeGenOnly = 1
6687 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
6688 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">,
6689 XS, EVEX_CD8<32, CD8VT1>;
6690 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
6691 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">,
6692 VEX_W, XS, EVEX_CD8<32, CD8VT1>;
6693 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
6694 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">,
6695 XD, EVEX_CD8<64, CD8VT1>;
6696 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
6697 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">,
6698 VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6700 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
6701 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">,
6702 XS, EVEX_CD8<32, CD8VT1>;
6703 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
6704 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">,
6705 XS,VEX_W, EVEX_CD8<32, CD8VT1>;
6706 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
6707 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">,
6708 XD, EVEX_CD8<64, CD8VT1>;
6709 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
6710 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">,
6711 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6712 let Predicates = [HasAVX512] in {
6713 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
6714 (VCVTTSS2SIZrr_Int VR128X:$src)>;
6715 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6716 (VCVTTSS2SIZrm_Int ssmem:$src)>;
6717 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
6718 (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
6719 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6720 (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
6721 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
6722 (VCVTTSD2SIZrr_Int VR128X:$src)>;
6723 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6724 (VCVTTSD2SIZrm_Int sdmem:$src)>;
6725 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
6726 (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
6727 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6728 (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
6731 //===----------------------------------------------------------------------===//
6732 // AVX-512 Convert form float to double and back
6733 //===----------------------------------------------------------------------===//
6735 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6736 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> {
6737 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6738 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6739 "$src2, $src1", "$src1, $src2",
6740 (_.VT (OpNode (_.VT _.RC:$src1),
6741 (_Src.VT _Src.RC:$src2),
6742 (i32 FROUND_CURRENT))), itins.rr>,
6743 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
6744 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6745 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
6746 "$src2, $src1", "$src1, $src2",
6747 (_.VT (OpNode (_.VT _.RC:$src1),
6748 (_Src.VT _Src.ScalarIntMemCPat:$src2),
6749 (i32 FROUND_CURRENT))), itins.rm>,
6751 Sched<[itins.Sched.Folded, ReadAfterLd]>;
6753 let isCodeGenOnly = 1, hasSideEffects = 0 in {
6754 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6755 (ins _.FRC:$src1, _Src.FRC:$src2),
6756 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6757 itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
6759 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6760 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
6761 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6762 itins.rm>, EVEX_4V, VEX_LIG,
6763 Sched<[itins.Sched.Folded, ReadAfterLd]>;
6767 // Scalar Coversion with SAE - suppress all exceptions
6768 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6769 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
6770 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6771 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6772 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
6773 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6774 (_Src.VT _Src.RC:$src2),
6775 (i32 FROUND_NO_EXC))), itins.rr>,
6776 EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>;
6779 // Scalar Conversion with rounding control (RC)
6780 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6781 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
6782 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6783 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
6784 "$rc, $src2, $src1", "$src1, $src2, $rc",
6785 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6786 (_Src.VT _Src.RC:$src2), (i32 imm:$rc))),
6788 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>,
6791 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
6792 SDNode OpNodeRnd, OpndItins itins,
6793 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
6794 let Predicates = [HasAVX512] in {
6795 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6796 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
6797 OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
6801 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
6802 SDNode OpNodeRnd, OpndItins itins,
6803 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
6804 let Predicates = [HasAVX512] in {
6805 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6806 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6807 EVEX_CD8<32, CD8VT1>, XS;
6810 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
6811 X86froundRnd, SSE_CVT_SD2SS, f64x_info,
6812 f32x_info>, NotMemoryFoldable;
6813 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
6814 X86fpextRnd, SSE_CVT_SS2SD, f32x_info,
6815 f64x_info>, NotMemoryFoldable;
6817 def : Pat<(f64 (fpextend FR32X:$src)),
6818 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
6819 Requires<[HasAVX512]>;
6820 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
6821 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6822 Requires<[HasAVX512]>;
6824 def : Pat<(f64 (extloadf32 addr:$src)),
6825 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6826 Requires<[HasAVX512, OptForSize]>;
6828 def : Pat<(f64 (extloadf32 addr:$src)),
6829 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
6830 Requires<[HasAVX512, OptForSpeed]>;
6832 def : Pat<(f32 (fpround FR64X:$src)),
6833 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
6834 Requires<[HasAVX512]>;
6836 def : Pat<(v4f32 (X86Movss
6837 (v4f32 VR128X:$dst),
6838 (v4f32 (scalar_to_vector
6839 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
6840 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
6841 Requires<[HasAVX512]>;
6843 def : Pat<(v2f64 (X86Movsd
6844 (v2f64 VR128X:$dst),
6845 (v2f64 (scalar_to_vector
6846 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
6847 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
6848 Requires<[HasAVX512]>;
6850 //===----------------------------------------------------------------------===//
6851 // AVX-512 Vector convert from signed/unsigned integer to float/double
6852 // and from float/double to signed/unsigned integer
6853 //===----------------------------------------------------------------------===//
6855 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6856 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins,
6857 string Broadcast = _.BroadcastStr,
6858 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
6860 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6861 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
6862 (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>,
6863 EVEX, Sched<[itins.Sched]>;
6865 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6866 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
6867 (_.VT (OpNode (_Src.VT
6868 (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>,
6869 EVEX, Sched<[itins.Sched.Folded]>;
6871 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6872 (ins _Src.ScalarMemOp:$src), OpcodeStr,
6873 "${src}"##Broadcast, "${src}"##Broadcast,
6874 (_.VT (OpNode (_Src.VT
6875 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
6876 )), itins.rm>, EVEX, EVEX_B,
6877 Sched<[itins.Sched.Folded]>;
6879 // Coversion with SAE - suppress all exceptions
6880 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6881 X86VectorVTInfo _Src, SDNode OpNodeRnd,
6883 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6884 (ins _Src.RC:$src), OpcodeStr,
6885 "{sae}, $src", "$src, {sae}",
6886 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
6887 (i32 FROUND_NO_EXC))), itins.rr>,
6888 EVEX, EVEX_B, Sched<[itins.Sched]>;
6891 // Conversion with rounding control (RC)
6892 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6893 X86VectorVTInfo _Src, SDNode OpNodeRnd,
6895 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6896 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
6897 "$rc, $src", "$src, $rc",
6898 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))),
6899 itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
6902 // Extend Float to Double
6903 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
6905 let Predicates = [HasAVX512] in {
6906 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
6908 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
6909 X86vfpextRnd, itins>, EVEX_V512;
6911 let Predicates = [HasVLX] in {
6912 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
6913 X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128;
6914 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
6919 // Truncate Double to Float
6920 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> {
6921 let Predicates = [HasAVX512] in {
6922 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>,
6923 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
6924 X86vfproundRnd, itins>, EVEX_V512;
6926 let Predicates = [HasVLX] in {
6927 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
6928 X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128;
6929 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
6930 itins, "{1to4}", "{y}">, EVEX_V256;
6932 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6933 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
6934 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6935 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
6936 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6937 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
6938 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6939 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
6943 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>,
6944 VEX_W, PD, EVEX_CD8<64, CD8VF>;
6945 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>,
6946 PS, EVEX_CD8<32, CD8VH>;
6948 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
6949 (VCVTPS2PDZrm addr:$src)>;
6951 let Predicates = [HasVLX] in {
6952 let AddedComplexity = 15 in {
6953 def : Pat<(X86vzmovl (v2f64 (bitconvert
6954 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
6955 (VCVTPD2PSZ128rr VR128X:$src)>;
6956 def : Pat<(X86vzmovl (v2f64 (bitconvert
6957 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
6958 (VCVTPD2PSZ128rm addr:$src)>;
6960 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
6961 (VCVTPS2PDZ128rm addr:$src)>;
6962 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
6963 (VCVTPS2PDZ256rm addr:$src)>;
6966 // Convert Signed/Unsigned Doubleword to Double
6967 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
6968 SDNode OpNode128, OpndItins itins> {
6969 // No rounding in this op
6970 let Predicates = [HasAVX512] in
6971 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
6974 let Predicates = [HasVLX] in {
6975 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
6976 OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128;
6977 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
6982 // Convert Signed/Unsigned Doubleword to Float
6983 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
6984 SDNode OpNodeRnd, OpndItins itins> {
6985 let Predicates = [HasAVX512] in
6986 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
6988 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
6989 OpNodeRnd, itins>, EVEX_V512;
6991 let Predicates = [HasVLX] in {
6992 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
6994 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
6999 // Convert Float to Signed/Unsigned Doubleword with truncation
7000 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7001 SDNode OpNodeRnd, OpndItins itins> {
7002 let Predicates = [HasAVX512] in {
7003 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7005 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7006 OpNodeRnd, itins>, EVEX_V512;
7008 let Predicates = [HasVLX] in {
7009 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7011 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7016 // Convert Float to Signed/Unsigned Doubleword
7017 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7018 SDNode OpNodeRnd, OpndItins itins> {
7019 let Predicates = [HasAVX512] in {
7020 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7022 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7023 OpNodeRnd, itins>, EVEX_V512;
7025 let Predicates = [HasVLX] in {
7026 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7028 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7033 // Convert Double to Signed/Unsigned Doubleword with truncation
7034 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7035 SDNode OpNode128, SDNode OpNodeRnd,
7037 let Predicates = [HasAVX512] in {
7038 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7040 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7041 OpNodeRnd, itins>, EVEX_V512;
7043 let Predicates = [HasVLX] in {
7044 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7045 // memory forms of these instructions in Asm Parser. They have the same
7046 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7047 // due to the same reason.
7048 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7049 OpNode128, itins, "{1to2}", "{x}">, EVEX_V128;
7050 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7051 itins, "{1to4}", "{y}">, EVEX_V256;
7053 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7054 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7055 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7056 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7057 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7058 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7059 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7060 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
7064 // Convert Double to Signed/Unsigned Doubleword
7065 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7066 SDNode OpNodeRnd, OpndItins itins> {
7067 let Predicates = [HasAVX512] in {
7068 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7070 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7071 OpNodeRnd, itins>, EVEX_V512;
7073 let Predicates = [HasVLX] in {
7074 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7075 // memory forms of these instructions in Asm Parcer. They have the same
7076 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7077 // due to the same reason.
7078 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7079 itins, "{1to2}", "{x}">, EVEX_V128;
7080 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7081 itins, "{1to4}", "{y}">, EVEX_V256;
7083 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7084 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7085 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7086 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7087 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7088 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7089 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7090 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
7094 // Convert Double to Signed/Unsigned Quardword
7095 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7096 SDNode OpNodeRnd, OpndItins itins> {
7097 let Predicates = [HasDQI] in {
7098 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7100 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7101 OpNodeRnd,itins>, EVEX_V512;
7103 let Predicates = [HasDQI, HasVLX] in {
7104 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7106 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7111 // Convert Double to Signed/Unsigned Quardword with truncation
7112 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7113 SDNode OpNodeRnd, OpndItins itins> {
7114 let Predicates = [HasDQI] in {
7115 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7117 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7118 OpNodeRnd, itins>, EVEX_V512;
7120 let Predicates = [HasDQI, HasVLX] in {
7121 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7123 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7128 // Convert Signed/Unsigned Quardword to Double
7129 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7130 SDNode OpNodeRnd, OpndItins itins> {
7131 let Predicates = [HasDQI] in {
7132 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7134 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7135 OpNodeRnd, itins>, EVEX_V512;
7137 let Predicates = [HasDQI, HasVLX] in {
7138 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7140 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7145 // Convert Float to Signed/Unsigned Quardword
7146 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7147 SDNode OpNodeRnd, OpndItins itins> {
7148 let Predicates = [HasDQI] in {
7149 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7151 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7152 OpNodeRnd, itins>, EVEX_V512;
7154 let Predicates = [HasDQI, HasVLX] in {
7155 // Explicitly specified broadcast string, since we take only 2 elements
7156 // from v4f32x_info source
7157 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7158 itins, "{1to2}", "", f64mem>, EVEX_V128;
7159 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7164 // Convert Float to Signed/Unsigned Quardword with truncation
7165 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7166 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
7167 let Predicates = [HasDQI] in {
7168 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7170 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7171 OpNodeRnd, itins>, EVEX_V512;
7173 let Predicates = [HasDQI, HasVLX] in {
7174 // Explicitly specified broadcast string, since we take only 2 elements
7175 // from v4f32x_info source
7176 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
7177 itins, "{1to2}", "", f64mem>, EVEX_V128;
7178 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7183 // Convert Signed/Unsigned Quardword to Float
7184 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7185 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
7186 let Predicates = [HasDQI] in {
7187 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7189 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7190 OpNodeRnd, itins>, EVEX_V512;
7192 let Predicates = [HasDQI, HasVLX] in {
7193 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7194 // memory forms of these instructions in Asm Parcer. They have the same
7195 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7196 // due to the same reason.
7197 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7198 itins, "{1to2}", "{x}">, EVEX_V128;
7199 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7200 itins, "{1to4}", "{y}">, EVEX_V256;
7202 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7203 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7204 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7205 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7206 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7207 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7208 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7209 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
7213 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7214 SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>;
7216 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
7217 X86VSintToFpRnd, SSE_CVT_I2PS>,
7218 PS, EVEX_CD8<32, CD8VF>;
7220 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
7221 X86cvttp2siRnd, SSE_CVT_PS2I>,
7222 XS, EVEX_CD8<32, CD8VF>;
7224 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
7225 X86cvttp2siRnd, SSE_CVT_PD2I>,
7226 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7228 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
7229 X86cvttp2uiRnd, SSE_CVT_PS2I>, PS,
7230 EVEX_CD8<32, CD8VF>;
7232 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
7233 X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>,
7234 PS, VEX_W, EVEX_CD8<64, CD8VF>;
7236 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7237 X86VUintToFP, SSE_CVT_I2PD>, XS,
7238 EVEX_CD8<32, CD8VH>;
7240 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
7241 X86VUintToFpRnd, SSE_CVT_I2PS>, XD,
7242 EVEX_CD8<32, CD8VF>;
7244 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
7245 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7246 EVEX_CD8<32, CD8VF>;
7248 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
7249 X86cvtp2IntRnd, SSE_CVT_PD2I>, XD,
7250 VEX_W, EVEX_CD8<64, CD8VF>;
7252 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
7253 X86cvtp2UIntRnd, SSE_CVT_PS2I>,
7254 PS, EVEX_CD8<32, CD8VF>;
7256 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
7257 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
7258 PS, EVEX_CD8<64, CD8VF>;
7260 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
7261 X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W,
7262 PD, EVEX_CD8<64, CD8VF>;
7264 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
7265 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7266 EVEX_CD8<32, CD8VH>;
7268 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
7269 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
7270 PD, EVEX_CD8<64, CD8VF>;
7272 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
7273 X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD,
7274 EVEX_CD8<32, CD8VH>;
7276 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
7277 X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W,
7278 PD, EVEX_CD8<64, CD8VF>;
7280 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
7281 X86cvttp2siRnd, SSE_CVT_PS2I>, PD,
7282 EVEX_CD8<32, CD8VH>;
7284 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
7285 X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W,
7286 PD, EVEX_CD8<64, CD8VF>;
7288 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
7289 X86cvttp2uiRnd, SSE_CVT_PS2I>, PD,
7290 EVEX_CD8<32, CD8VH>;
7292 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
7293 X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7294 EVEX_CD8<64, CD8VF>;
7296 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
7297 X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7298 EVEX_CD8<64, CD8VF>;
7300 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
7301 X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS,
7302 EVEX_CD8<64, CD8VF>;
7304 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
7305 X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD,
7306 EVEX_CD8<64, CD8VF>;
7308 let Predicates = [HasAVX512, NoVLX] in {
7309 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
7310 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7311 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7312 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7314 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7315 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7316 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7317 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7319 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7320 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
7321 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7322 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7324 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7325 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7326 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7327 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7329 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7330 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7331 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7332 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7334 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7335 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7336 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7337 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7339 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
7340 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7341 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7342 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7345 let Predicates = [HasAVX512, HasVLX] in {
7346 let AddedComplexity = 15 in {
7347 def : Pat<(X86vzmovl (v2i64 (bitconvert
7348 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
7349 (VCVTPD2DQZ128rr VR128X:$src)>;
7350 def : Pat<(X86vzmovl (v2i64 (bitconvert
7351 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
7352 (VCVTPD2DQZ128rm addr:$src)>;
7353 def : Pat<(X86vzmovl (v2i64 (bitconvert
7354 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
7355 (VCVTPD2UDQZ128rr VR128X:$src)>;
7356 def : Pat<(X86vzmovl (v2i64 (bitconvert
7357 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
7358 (VCVTTPD2DQZ128rr VR128X:$src)>;
7359 def : Pat<(X86vzmovl (v2i64 (bitconvert
7360 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
7361 (VCVTTPD2DQZ128rm addr:$src)>;
7362 def : Pat<(X86vzmovl (v2i64 (bitconvert
7363 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
7364 (VCVTTPD2UDQZ128rr VR128X:$src)>;
7367 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7368 (VCVTDQ2PDZ128rm addr:$src)>;
7369 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7370 (VCVTDQ2PDZ128rm addr:$src)>;
7372 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7373 (VCVTUDQ2PDZ128rm addr:$src)>;
7374 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7375 (VCVTUDQ2PDZ128rm addr:$src)>;
7378 let Predicates = [HasAVX512] in {
7379 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7380 (VCVTPD2PSZrm addr:$src)>;
7381 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7382 (VCVTPS2PDZrm addr:$src)>;
7385 let Predicates = [HasDQI, HasVLX] in {
7386 let AddedComplexity = 15 in {
7387 def : Pat<(X86vzmovl (v2f64 (bitconvert
7388 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
7389 (VCVTQQ2PSZ128rr VR128X:$src)>;
7390 def : Pat<(X86vzmovl (v2f64 (bitconvert
7391 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
7392 (VCVTUQQ2PSZ128rr VR128X:$src)>;
7396 let Predicates = [HasDQI, NoVLX] in {
7397 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7398 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7399 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7400 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7402 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7403 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7404 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7405 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7407 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7408 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7409 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7410 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7412 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7413 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7414 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7415 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7417 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7418 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7419 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7420 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7422 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7423 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7424 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7425 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7427 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7428 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7429 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7430 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7432 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7433 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7434 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7435 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7437 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7438 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7439 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7440 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7442 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7443 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7444 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7445 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7447 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7448 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7449 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7450 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7452 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7453 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7454 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7455 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7458 //===----------------------------------------------------------------------===//
7459 // Half precision conversion instructions
7460 //===----------------------------------------------------------------------===//
7462 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7463 X86MemOperand x86memop, PatFrag ld_frag,
7465 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7466 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
7467 (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>,
7468 T8PD, Sched<[itins.Sched]>;
7469 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7470 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7471 (X86cvtph2ps (_src.VT
7473 (ld_frag addr:$src)))), itins.rm>,
7474 T8PD, Sched<[itins.Sched.Folded]>;
7477 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7479 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7480 (ins _src.RC:$src), "vcvtph2ps",
7481 "{sae}, $src", "$src, {sae}",
7482 (X86cvtph2psRnd (_src.VT _src.RC:$src),
7483 (i32 FROUND_NO_EXC)), itins.rr>,
7484 T8PD, EVEX_B, Sched<[itins.Sched]>;
7487 let Predicates = [HasAVX512] in
7488 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
7490 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>,
7491 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
7493 let Predicates = [HasVLX] in {
7494 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
7495 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256,
7496 EVEX_CD8<32, CD8VH>;
7497 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
7498 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128,
7499 EVEX_CD8<32, CD8VH>;
7501 // Pattern match vcvtph2ps of a scalar i64 load.
7502 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7503 (VCVTPH2PSZ128rm addr:$src)>;
7504 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7505 (VCVTPH2PSZ128rm addr:$src)>;
7506 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7507 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7508 (VCVTPH2PSZ128rm addr:$src)>;
7511 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7512 X86MemOperand x86memop, OpndItins itins> {
7513 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
7514 (ins _src.RC:$src1, i32u8imm:$src2),
7515 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
7516 (X86cvtps2ph (_src.VT _src.RC:$src1),
7518 itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>;
7519 let hasSideEffects = 0, mayStore = 1 in {
7520 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7521 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7522 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7523 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7524 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7525 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7526 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
7527 [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7531 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7533 let hasSideEffects = 0 in
7534 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
7535 (outs _dest.RC:$dst),
7536 (ins _src.RC:$src1, i32u8imm:$src2),
7537 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
7538 [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>;
7541 let Predicates = [HasAVX512] in {
7542 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
7544 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
7545 SSE_CVT_PS2PH>, EVEX, EVEX_V512,
7546 EVEX_CD8<32, CD8VH>;
7547 let Predicates = [HasVLX] in {
7548 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
7549 SSE_CVT_PS2PH>, EVEX, EVEX_V256,
7550 EVEX_CD8<32, CD8VH>;
7551 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
7552 SSE_CVT_PS2PH>, EVEX, EVEX_V128,
7553 EVEX_CD8<32, CD8VH>;
7556 def : Pat<(store (f64 (extractelt
7557 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7558 (iPTR 0))), addr:$dst),
7559 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7560 def : Pat<(store (i64 (extractelt
7561 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7562 (iPTR 0))), addr:$dst),
7563 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7564 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7565 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7566 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7567 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
7570 // Patterns for matching conversions from float to half-float and vice versa.
7571 let Predicates = [HasVLX] in {
7572 // Use MXCSR.RC for rounding instead of explicitly specifying the default
7573 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7574 // configurations we support (the default). However, falling back to MXCSR is
7575 // more consistent with other instructions, which are always controlled by it.
7576 // It's encoded as 0b100.
7577 def : Pat<(fp_to_f16 FR32X:$src),
7578 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7579 (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7581 def : Pat<(f16_to_fp GR16:$src),
7582 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7583 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7585 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7586 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7587 (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7590 // Unordered/Ordered scalar fp compare with Sea and set EFLAGS
7591 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
7592 string OpcodeStr, OpndItins itins> {
7593 let hasSideEffects = 0 in
7594 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7595 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7596 [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7597 Sched<[itins.Sched]>;
7600 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7601 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>,
7602 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7603 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>,
7604 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7605 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>,
7606 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7607 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>,
7608 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7611 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7612 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
7613 "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7614 EVEX_CD8<32, CD8VT1>;
7615 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
7616 "ucomisd", SSE_COMIS>, PD, EVEX,
7617 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7618 let Pattern = []<dag> in {
7619 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
7620 "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7621 EVEX_CD8<32, CD8VT1>;
7622 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
7623 "comisd", SSE_COMIS>, PD, EVEX,
7624 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7626 let isCodeGenOnly = 1 in {
7627 defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
7628 sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7629 EVEX_CD8<32, CD8VT1>;
7630 defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
7631 sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX,
7632 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7634 defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
7635 sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7636 EVEX_CD8<32, CD8VT1>;
7637 defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
7638 sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX,
7639 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7643 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
7644 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7645 OpndItins itins, X86VectorVTInfo _> {
7646 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
7647 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7648 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7649 "$src2, $src1", "$src1, $src2",
7650 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
7651 EVEX_4V, Sched<[itins.Sched]>;
7652 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7653 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7654 "$src2, $src1", "$src1, $src2",
7655 (OpNode (_.VT _.RC:$src1),
7656 _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
7657 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7661 defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
7662 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
7663 defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
7664 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
7665 defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
7666 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
7667 defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
7668 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
7670 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7671 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
7672 OpndItins itins, X86VectorVTInfo _> {
7673 let ExeDomain = _.ExeDomain in {
7674 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7675 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7676 (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD,
7677 Sched<[itins.Sched]>;
7678 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7679 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7681 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD,
7682 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7683 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7684 (ins _.ScalarMemOp:$src), OpcodeStr,
7685 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7687 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7688 EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7692 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
7694 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
7695 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
7696 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
7697 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7699 // Define only if AVX512VL feature is present.
7700 let Predicates = [HasVLX] in {
7701 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7702 OpNode, itins.s, v4f32x_info>,
7703 EVEX_V128, EVEX_CD8<32, CD8VF>;
7704 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7705 OpNode, itins.s, v8f32x_info>,
7706 EVEX_V256, EVEX_CD8<32, CD8VF>;
7707 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7708 OpNode, itins.d, v2f64x_info>,
7709 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7710 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7711 OpNode, itins.d, v4f64x_info>,
7712 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7716 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
7717 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
7719 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
7720 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
7721 SDNode OpNode, OpndItins itins> {
7722 let ExeDomain = _.ExeDomain in {
7723 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7724 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7725 "$src2, $src1", "$src1, $src2",
7726 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7727 (i32 FROUND_CURRENT)), itins.rr>,
7728 Sched<[itins.Sched]>;
7730 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7731 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7732 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7733 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7734 (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B,
7735 Sched<[itins.Sched]>;
7737 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7738 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7739 "$src2, $src1", "$src1, $src2",
7740 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
7741 (i32 FROUND_CURRENT)), itins.rm>,
7742 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7746 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7748 defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
7749 EVEX_CD8<32, CD8VT1>;
7750 defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
7751 EVEX_CD8<64, CD8VT1>, VEX_W;
7754 let Predicates = [HasERI] in {
7755 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
7757 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
7761 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
7763 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
7765 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7766 SDNode OpNode, OpndItins itins> {
7767 let ExeDomain = _.ExeDomain in {
7768 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7769 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7770 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)),
7771 itins.rr>, Sched<[itins.Sched]>;
7773 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7774 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7776 (bitconvert (_.LdFrag addr:$src))),
7777 (i32 FROUND_CURRENT)), itins.rm>,
7778 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7780 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7781 (ins _.ScalarMemOp:$src), OpcodeStr,
7782 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7784 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
7785 (i32 FROUND_CURRENT)), itins.rm>, EVEX_B,
7786 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7789 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7790 SDNode OpNode, OpndItins itins> {
7791 let ExeDomain = _.ExeDomain in
7792 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7793 (ins _.RC:$src), OpcodeStr,
7794 "{sae}, $src", "$src, {sae}",
7795 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
7796 itins.rr>, EVEX_B, Sched<[itins.Sched]>;
7799 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
7801 defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7802 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7803 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
7804 defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7805 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7806 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7809 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
7810 SDNode OpNode, SizeItins itins> {
7811 // Define only if AVX512VL feature is present.
7812 let Predicates = [HasVLX] in {
7813 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
7814 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
7815 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
7816 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
7817 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
7818 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7819 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
7820 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7823 let Predicates = [HasERI] in {
7825 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
7826 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
7827 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
7829 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
7830 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
7831 SSE_ALU_ITINS_P>, EVEX;
7833 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
7835 let ExeDomain = _.ExeDomain in
7836 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7837 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
7838 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc))), itins.rr>,
7839 EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
7842 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins,
7844 let ExeDomain = _.ExeDomain in {
7845 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7846 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7847 (_.FloatVT (fsqrt _.RC:$src)), itins.rr>, EVEX,
7848 Sched<[itins.Sched]>;
7849 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7850 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7852 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX,
7853 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7854 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7855 (ins _.ScalarMemOp:$src), OpcodeStr,
7856 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7858 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7859 EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7863 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
7864 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>,
7865 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7866 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>,
7867 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7868 // Define only if AVX512VL feature is present.
7869 let Predicates = [HasVLX] in {
7870 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7871 SSE_SQRTPS, v4f32x_info>,
7872 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
7873 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7874 SSE_SQRTPS, v8f32x_info>,
7875 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
7876 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7877 SSE_SQRTPD, v2f64x_info>,
7878 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7879 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7880 SSE_SQRTPD, v4f64x_info>,
7881 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7885 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
7886 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS,
7887 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7888 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD,
7889 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7892 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins,
7893 X86VectorVTInfo _, string SUFF, Intrinsic Intr> {
7894 let ExeDomain = _.ExeDomain in {
7895 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7896 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7897 "$src2, $src1", "$src1, $src2",
7898 (X86fsqrtRnds (_.VT _.RC:$src1),
7900 (i32 FROUND_CURRENT)), itins.rr>,
7901 Sched<[itins.Sched]>;
7902 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7903 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7904 "$src2, $src1", "$src1, $src2",
7905 (X86fsqrtRnds (_.VT _.RC:$src1),
7906 _.ScalarIntMemCPat:$src2,
7907 (i32 FROUND_CURRENT)), itins.rm>,
7908 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7909 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7910 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
7911 "$rc, $src2, $src1", "$src1, $src2, $rc",
7912 (X86fsqrtRnds (_.VT _.RC:$src1),
7914 (i32 imm:$rc)), itins.rr>,
7915 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
7917 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7918 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7919 (ins _.FRC:$src1, _.FRC:$src2),
7920 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], itins.rr>,
7921 Sched<[itins.Sched]>;
7923 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7924 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
7925 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], itins.rm>,
7926 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7930 let Predicates = [HasAVX512] in {
7931 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
7932 (!cast<Instruction>(NAME#SUFF#Zr)
7933 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
7935 def : Pat<(Intr VR128X:$src),
7936 (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
7940 let Predicates = [HasAVX512, OptForSize] in {
7941 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
7942 (!cast<Instruction>(NAME#SUFF#Zm)
7943 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
7945 def : Pat<(Intr _.ScalarIntMemCPat:$src2),
7946 (!cast<Instruction>(NAME#SUFF#Zm_Int)
7947 (_.VT (IMPLICIT_DEF)), addr:$src2)>;
7952 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
7953 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS",
7954 int_x86_sse_sqrt_ss>,
7955 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
7956 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD",
7957 int_x86_sse2_sqrt_sd>,
7958 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
7962 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">,
7963 avx512_sqrt_packed_all_round<0x51, "vsqrt">;
7965 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
7967 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
7968 OpndItins itins, X86VectorVTInfo _> {
7969 let ExeDomain = _.ExeDomain in {
7970 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7971 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7972 "$src3, $src2, $src1", "$src1, $src2, $src3",
7973 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7974 (i32 imm:$src3))), itins.rr>,
7975 Sched<[itins.Sched]>;
7977 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7978 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7979 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
7980 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7981 (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B,
7982 Sched<[itins.Sched]>;
7984 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7985 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
7987 "$src3, $src2, $src1", "$src1, $src2, $src3",
7988 (_.VT (X86RndScales _.RC:$src1,
7989 _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>,
7990 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7992 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7993 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7994 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
7995 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7996 [], itins.rr>, Sched<[itins.Sched]>;
7999 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8000 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8001 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
8002 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
8006 let Predicates = [HasAVX512] in {
8007 def : Pat<(ffloor _.FRC:$src),
8008 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8009 _.FRC:$src, (i32 0x9)))>;
8010 def : Pat<(fceil _.FRC:$src),
8011 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8012 _.FRC:$src, (i32 0xa)))>;
8013 def : Pat<(ftrunc _.FRC:$src),
8014 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8015 _.FRC:$src, (i32 0xb)))>;
8016 def : Pat<(frint _.FRC:$src),
8017 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8018 _.FRC:$src, (i32 0x4)))>;
8019 def : Pat<(fnearbyint _.FRC:$src),
8020 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
8021 _.FRC:$src, (i32 0xc)))>;
8024 let Predicates = [HasAVX512, OptForSize] in {
8025 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
8026 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8027 addr:$src, (i32 0x9)))>;
8028 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
8029 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8030 addr:$src, (i32 0xa)))>;
8031 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
8032 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8033 addr:$src, (i32 0xb)))>;
8034 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
8035 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8036 addr:$src, (i32 0x4)))>;
8037 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
8038 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
8039 addr:$src, (i32 0xc)))>;
8043 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S,
8044 f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
8046 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
8047 f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
8048 EVEX_CD8<64, CD8VT1>;
8050 //-------------------------------------------------
8051 // Integer truncate and extend operations
8052 //-------------------------------------------------
8054 let Sched = WriteShuffle256 in
8055 def AVX512_EXTEND : OpndItins<
8056 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8059 let Sched = WriteShuffle256 in
8060 def AVX512_TRUNCATE : OpndItins<
8061 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
8064 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
8065 OpndItins itins, X86VectorVTInfo SrcInfo,
8066 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
8067 let ExeDomain = DestInfo.ExeDomain in
8068 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8069 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
8070 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8071 itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>;
8073 let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8074 ExeDomain = DestInfo.ExeDomain in {
8075 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8076 (ins x86memop:$dst, SrcInfo.RC:$src),
8077 OpcodeStr # "\t{$src, $dst|$dst, $src}",
8078 [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>;
8080 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8081 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
8082 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8083 [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>;
8084 }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
8087 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8088 X86VectorVTInfo DestInfo,
8089 PatFrag truncFrag, PatFrag mtruncFrag > {
8091 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8092 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8093 addr:$dst, SrcInfo.RC:$src)>;
8095 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8096 (SrcInfo.VT SrcInfo.RC:$src)),
8097 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8098 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8101 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
8102 OpndItins itins, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
8103 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8104 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8105 X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
8106 Predicate prd = HasAVX512>{
8108 let Predicates = [HasVLX, prd] in {
8109 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8110 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
8111 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8112 truncFrag, mtruncFrag>, EVEX_V128;
8114 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8115 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
8116 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8117 truncFrag, mtruncFrag>, EVEX_V256;
8119 let Predicates = [prd] in
8120 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8121 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
8122 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8123 truncFrag, mtruncFrag>, EVEX_V512;
8126 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8127 OpndItins itins, PatFrag StoreNode,
8128 PatFrag MaskedStoreNode> {
8129 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8130 v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
8131 StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
8134 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8135 OpndItins itins, PatFrag StoreNode,
8136 PatFrag MaskedStoreNode> {
8137 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8138 v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
8139 StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
8142 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8143 OpndItins itins, PatFrag StoreNode,
8144 PatFrag MaskedStoreNode> {
8145 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8146 v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
8147 StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
8150 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
8151 OpndItins itins, PatFrag StoreNode,
8152 PatFrag MaskedStoreNode> {
8153 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i32_info,
8154 v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
8155 StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
8158 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8159 OpndItins itins, PatFrag StoreNode,
8160 PatFrag MaskedStoreNode> {
8161 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i32_info,
8162 v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
8163 StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
8166 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8167 OpndItins itins, PatFrag StoreNode,
8168 PatFrag MaskedStoreNode> {
8169 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i16_info,
8170 v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
8171 StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
8174 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc, AVX512_TRUNCATE,
8175 truncstorevi8, masked_truncstorevi8>;
8176 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE,
8177 truncstore_s_vi8, masked_truncstore_s_vi8>;
8178 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE,
8179 truncstore_us_vi8, masked_truncstore_us_vi8>;
8181 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc, AVX512_TRUNCATE,
8182 truncstorevi16, masked_truncstorevi16>;
8183 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE,
8184 truncstore_s_vi16, masked_truncstore_s_vi16>;
8185 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE,
8186 truncstore_us_vi16, masked_truncstore_us_vi16>;
8188 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc, AVX512_TRUNCATE,
8189 truncstorevi32, masked_truncstorevi32>;
8190 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE,
8191 truncstore_s_vi32, masked_truncstore_s_vi32>;
8192 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE,
8193 truncstore_us_vi32, masked_truncstore_us_vi32>;
8195 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc, AVX512_TRUNCATE,
8196 truncstorevi8, masked_truncstorevi8>;
8197 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE,
8198 truncstore_s_vi8, masked_truncstore_s_vi8>;
8199 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE,
8200 truncstore_us_vi8, masked_truncstore_us_vi8>;
8202 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc, AVX512_TRUNCATE,
8203 truncstorevi16, masked_truncstorevi16>;
8204 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE,
8205 truncstore_s_vi16, masked_truncstore_s_vi16>;
8206 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE,
8207 truncstore_us_vi16, masked_truncstore_us_vi16>;
8209 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc, AVX512_TRUNCATE,
8210 truncstorevi8, masked_truncstorevi8>;
8211 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE,
8212 truncstore_s_vi8, masked_truncstore_s_vi8>;
8213 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE,
8214 truncstore_us_vi8, masked_truncstore_us_vi8>;
8216 let Predicates = [HasAVX512, NoVLX] in {
8217 def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
8218 (v8i16 (EXTRACT_SUBREG
8219 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8220 VR256X:$src, sub_ymm)))), sub_xmm))>;
8221 def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
8222 (v4i32 (EXTRACT_SUBREG
8223 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8224 VR256X:$src, sub_ymm)))), sub_xmm))>;
8227 let Predicates = [HasBWI, NoVLX] in {
8228 def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
8229 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
8230 VR256X:$src, sub_ymm))), sub_xmm))>;
8233 multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins,
8234 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
8235 X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
8236 let ExeDomain = DestInfo.ExeDomain in {
8237 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8238 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
8239 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>,
8240 EVEX, Sched<[itins.Sched]>;
8242 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8243 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
8244 (DestInfo.VT (LdFrag addr:$src)), itins.rm>,
8245 EVEX, Sched<[itins.Sched.Folded]>;
8249 multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
8250 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8251 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8252 let Predicates = [HasVLX, HasBWI] in {
8253 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info,
8254 v16i8x_info, i64mem, LdFrag, InVecNode>,
8255 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
8257 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info,
8258 v16i8x_info, i128mem, LdFrag, OpNode>,
8259 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
8261 let Predicates = [HasBWI] in {
8262 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v32i16_info,
8263 v32i8x_info, i256mem, LdFrag, OpNode>,
8264 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
8268 multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
8269 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8270 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8271 let Predicates = [HasVLX, HasAVX512] in {
8272 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
8273 v16i8x_info, i32mem, LdFrag, InVecNode>,
8274 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
8276 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
8277 v16i8x_info, i64mem, LdFrag, OpNode>,
8278 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
8280 let Predicates = [HasAVX512] in {
8281 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
8282 v16i8x_info, i128mem, LdFrag, OpNode>,
8283 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
8287 multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
8288 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8289 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8290 let Predicates = [HasVLX, HasAVX512] in {
8291 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8292 v16i8x_info, i16mem, LdFrag, InVecNode>,
8293 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
8295 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8296 v16i8x_info, i32mem, LdFrag, OpNode>,
8297 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
8299 let Predicates = [HasAVX512] in {
8300 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8301 v16i8x_info, i64mem, LdFrag, OpNode>,
8302 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
8306 multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
8307 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8308 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8309 let Predicates = [HasVLX, HasAVX512] in {
8310 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
8311 v8i16x_info, i64mem, LdFrag, InVecNode>,
8312 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
8314 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
8315 v8i16x_info, i128mem, LdFrag, OpNode>,
8316 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
8318 let Predicates = [HasAVX512] in {
8319 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
8320 v16i16x_info, i256mem, LdFrag, OpNode>,
8321 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
8325 multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
8326 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8327 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8328 let Predicates = [HasVLX, HasAVX512] in {
8329 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8330 v8i16x_info, i32mem, LdFrag, InVecNode>,
8331 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
8333 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8334 v8i16x_info, i64mem, LdFrag, OpNode>,
8335 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
8337 let Predicates = [HasAVX512] in {
8338 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8339 v8i16x_info, i128mem, LdFrag, OpNode>,
8340 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
8344 multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
8345 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8346 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
8348 let Predicates = [HasVLX, HasAVX512] in {
8349 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8350 v4i32x_info, i64mem, LdFrag, InVecNode>,
8351 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8353 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8354 v4i32x_info, i128mem, LdFrag, OpNode>,
8355 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8357 let Predicates = [HasAVX512] in {
8358 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8359 v8i32x_info, i256mem, LdFrag, OpNode>,
8360 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8364 defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8365 defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8366 defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8367 defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8368 defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8369 defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8371 defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8372 defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8373 defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8374 defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8375 defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8376 defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8379 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8380 SDNode InVecOp, PatFrag ExtLoad16> {
8382 let Predicates = [HasVLX, HasBWI] in {
8383 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8384 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8385 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8386 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8387 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8388 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8389 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8390 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8391 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8392 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8394 let Predicates = [HasVLX] in {
8395 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8396 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8397 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8398 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8399 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8400 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8401 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8402 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8404 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
8405 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8406 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8407 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8408 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8409 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8410 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8411 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8413 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8414 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8415 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8416 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8417 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8418 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8419 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8420 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8421 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8422 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8424 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8425 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8426 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
8427 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8428 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8429 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8430 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8431 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8433 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8434 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8435 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8436 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8437 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8438 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8439 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
8440 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8441 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
8442 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8445 let Predicates = [HasVLX, HasBWI] in {
8446 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8447 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8448 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8449 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8450 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8451 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8453 let Predicates = [HasVLX] in {
8454 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8455 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8456 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8457 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8458 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8459 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8460 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8461 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8463 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8464 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8465 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8466 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8467 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8468 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8469 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8470 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8472 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8473 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8474 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8475 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8476 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8477 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8479 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8480 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8481 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8482 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8483 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8484 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8485 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8486 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8488 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8489 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8490 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8491 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8492 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8493 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8496 let Predicates = [HasBWI] in {
8497 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8498 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8500 let Predicates = [HasAVX512] in {
8501 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8502 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8504 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8505 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8506 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8507 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8509 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8510 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8512 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8513 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8515 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8516 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8520 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8521 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
8523 //===----------------------------------------------------------------------===//
8524 // GATHER - SCATTER Operations
8526 // FIXME: Improve scheduling of gather/scatter instructions.
8527 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8528 X86MemOperand memop, PatFrag GatherNode,
8529 RegisterClass MaskRC = _.KRCWM> {
8530 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8531 ExeDomain = _.ExeDomain in
8532 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
8533 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
8534 !strconcat(OpcodeStr#_.Suffix,
8535 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
8536 [(set _.RC:$dst, MaskRC:$mask_wb,
8537 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
8538 vectoraddr:$src2))]>, EVEX, EVEX_K,
8539 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
8542 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8543 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8544 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
8545 vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
8546 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
8547 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
8548 let Predicates = [HasVLX] in {
8549 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8550 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
8551 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
8552 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
8553 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8554 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
8555 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8556 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
8560 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8561 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8562 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
8563 mgatherv16i32>, EVEX_V512;
8564 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8565 mgatherv8i64>, EVEX_V512;
8566 let Predicates = [HasVLX] in {
8567 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8568 vy256xmem, mgatherv8i32>, EVEX_V256;
8569 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8570 vy128xmem, mgatherv4i64>, EVEX_V256;
8571 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8572 vx128xmem, mgatherv4i32>, EVEX_V128;
8573 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8574 vx64xmem, mgatherv2i64, VK2WM>,
8580 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8581 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8583 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8584 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
8586 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8587 X86MemOperand memop, PatFrag ScatterNode> {
8589 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
8591 def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
8592 (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
8593 !strconcat(OpcodeStr#_.Suffix,
8594 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
8595 [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8596 _.KRCWM:$mask, vectoraddr:$dst))]>,
8597 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8598 Sched<[WriteStore]>;
8601 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8602 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8603 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
8604 vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
8605 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
8606 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
8607 let Predicates = [HasVLX] in {
8608 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8609 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
8610 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
8611 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
8612 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8613 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
8614 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8615 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
8619 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8620 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8621 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
8622 mscatterv16i32>, EVEX_V512;
8623 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8624 mscatterv8i64>, EVEX_V512;
8625 let Predicates = [HasVLX] in {
8626 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8627 vy256xmem, mscatterv8i32>, EVEX_V256;
8628 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8629 vy128xmem, mscatterv4i64>, EVEX_V256;
8630 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8631 vx128xmem, mscatterv4i32>, EVEX_V128;
8632 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8633 vx64xmem, mscatterv2i64>, EVEX_V128;
8637 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8638 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
8640 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8641 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
8644 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8645 RegisterClass KRC, X86MemOperand memop> {
8646 let Predicates = [HasPFI], hasSideEffects = 1 in
8647 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
8648 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
8649 [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
8652 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
8653 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8655 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
8656 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8658 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
8659 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8661 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
8662 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8664 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
8665 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8667 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
8668 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8670 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
8671 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8673 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
8674 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8676 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
8677 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8679 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
8680 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8682 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
8683 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8685 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
8686 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8688 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
8689 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8691 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
8692 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8694 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
8695 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8697 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
8698 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8700 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
8701 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
8702 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
8703 [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))],
8704 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
8707 // Use 512bit version to implement 128/256 bit in case NoVLX.
8708 multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
8709 X86VectorVTInfo _> {
8711 def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
8712 (X86Info.VT (EXTRACT_SUBREG
8713 (_.VT (!cast<Instruction>(NAME#"Zrr")
8714 (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
8715 X86Info.SubRegIdx))>;
8718 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8719 string OpcodeStr, Predicate prd> {
8720 let Predicates = [prd] in
8721 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8723 let Predicates = [prd, HasVLX] in {
8724 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8725 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8727 let Predicates = [prd, NoVLX] in {
8728 defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
8729 defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
8734 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8735 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8736 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8737 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
8739 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
8740 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8741 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8742 [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))],
8743 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
8746 // Use 512bit version to implement 128/256 bit in case NoVLX.
8747 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
8748 X86VectorVTInfo _> {
8750 def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))),
8751 (_.KVT (COPY_TO_REGCLASS
8752 (!cast<Instruction>(NAME#"Zrr")
8753 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
8754 _.RC:$src, _.SubRegIdx)),
8758 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
8759 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8760 let Predicates = [prd] in
8761 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8764 let Predicates = [prd, HasVLX] in {
8765 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
8767 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
8770 let Predicates = [prd, NoVLX] in {
8771 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8772 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
8776 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8777 avx512vl_i8_info, HasBWI>;
8778 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8779 avx512vl_i16_info, HasBWI>, VEX_W;
8780 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8781 avx512vl_i32_info, HasDQI>;
8782 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8783 avx512vl_i64_info, HasDQI>, VEX_W;
8785 //===----------------------------------------------------------------------===//
8786 // AVX-512 - COMPRESS and EXPAND
8789 // FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
8790 let Sched = WriteShuffle256 in {
8791 def AVX512_COMPRESS : OpndItins<
8792 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8794 def AVX512_EXPAND : OpndItins<
8795 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8799 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
8800 string OpcodeStr, OpndItins itins> {
8801 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
8802 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8803 (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase,
8804 Sched<[itins.Sched]>;
8806 let mayStore = 1, hasSideEffects = 0 in
8807 def mr : AVX5128I<opc, MRMDestMem, (outs),
8808 (ins _.MemOp:$dst, _.RC:$src),
8809 OpcodeStr # "\t{$src, $dst|$dst, $src}",
8810 []>, EVEX_CD8<_.EltSize, CD8VT1>,
8811 Sched<[itins.Sched.Folded]>;
8813 def mrk : AVX5128I<opc, MRMDestMem, (outs),
8814 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
8815 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8817 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8818 Sched<[itins.Sched.Folded]>;
8821 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
8822 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
8824 (!cast<Instruction>(NAME#_.ZSuffix##mrk)
8825 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
8828 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
8830 AVX512VLVectorVTInfo VTInfo,
8831 Predicate Pred = HasAVX512> {
8832 let Predicates = [Pred] in
8833 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>,
8834 compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8836 let Predicates = [Pred, HasVLX] in {
8837 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>,
8838 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8839 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>,
8840 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8844 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS,
8845 avx512vl_i32_info>, EVEX;
8846 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS,
8847 avx512vl_i64_info>, EVEX, VEX_W;
8848 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS,
8849 avx512vl_f32_info>, EVEX;
8850 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS,
8851 avx512vl_f64_info>, EVEX, VEX_W;
8854 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
8855 string OpcodeStr, OpndItins itins> {
8856 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8857 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8858 (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase,
8859 Sched<[itins.Sched]>;
8861 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8862 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
8863 (_.VT (X86expand (_.VT (bitconvert
8864 (_.LdFrag addr:$src1))))), itins.rm>,
8865 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
8866 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8869 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
8871 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
8872 (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
8873 _.KRCWM:$mask, addr:$src)>;
8875 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
8876 (_.VT _.RC:$src0))),
8877 (!cast<Instruction>(NAME#_.ZSuffix##rmk)
8878 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
8881 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
8883 AVX512VLVectorVTInfo VTInfo,
8884 Predicate Pred = HasAVX512> {
8885 let Predicates = [Pred] in
8886 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>,
8887 expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8889 let Predicates = [Pred, HasVLX] in {
8890 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>,
8891 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8892 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>,
8893 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8897 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND,
8898 avx512vl_i32_info>, EVEX;
8899 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND,
8900 avx512vl_i64_info>, EVEX, VEX_W;
8901 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND,
8902 avx512vl_f32_info>, EVEX;
8903 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND,
8904 avx512vl_f64_info>, EVEX, VEX_W;
8906 //handle instruction reg_vec1 = op(reg_vec,imm)
8908 // op(broadcast(eltVt),imm)
8909 //all instruction created with FROUND_CURRENT
8910 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8911 OpndItins itins, X86VectorVTInfo _> {
8912 let ExeDomain = _.ExeDomain in {
8913 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8914 (ins _.RC:$src1, i32u8imm:$src2),
8915 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8916 (OpNode (_.VT _.RC:$src1),
8917 (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>;
8918 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8919 (ins _.MemOp:$src1, i32u8imm:$src2),
8920 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8921 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
8922 (i32 imm:$src2)), itins.rm>,
8923 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8924 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8925 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
8926 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
8927 "${src1}"##_.BroadcastStr##", $src2",
8928 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
8929 (i32 imm:$src2)), itins.rm>, EVEX_B,
8930 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8934 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8935 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
8936 SDNode OpNode, OpndItins itins,
8937 X86VectorVTInfo _> {
8938 let ExeDomain = _.ExeDomain in
8939 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8940 (ins _.RC:$src1, i32u8imm:$src2),
8941 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
8942 "$src1, {sae}, $src2",
8943 (OpNode (_.VT _.RC:$src1),
8945 (i32 FROUND_NO_EXC)), itins.rr>,
8946 EVEX_B, Sched<[itins.Sched]>;
8949 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
8950 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
8951 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
8952 let Predicates = [prd] in {
8953 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8955 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
8956 itins, _.info512>, EVEX_V512;
8958 let Predicates = [prd, HasVLX] in {
8959 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8960 _.info128>, EVEX_V128;
8961 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8962 _.info256>, EVEX_V256;
8966 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8967 // op(reg_vec2,mem_vec,imm)
8968 // op(reg_vec2,broadcast(eltVt),imm)
8969 //all instruction created with FROUND_CURRENT
8970 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8971 OpndItins itins, X86VectorVTInfo _>{
8972 let ExeDomain = _.ExeDomain in {
8973 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8974 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8975 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8976 (OpNode (_.VT _.RC:$src1),
8978 (i32 imm:$src3)), itins.rr>,
8979 Sched<[itins.Sched]>;
8980 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8981 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
8982 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8983 (OpNode (_.VT _.RC:$src1),
8984 (_.VT (bitconvert (_.LdFrag addr:$src2))),
8985 (i32 imm:$src3)), itins.rm>,
8986 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8987 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8988 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8989 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
8990 "$src1, ${src2}"##_.BroadcastStr##", $src3",
8991 (OpNode (_.VT _.RC:$src1),
8992 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
8993 (i32 imm:$src3)), itins.rm>, EVEX_B,
8994 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8998 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8999 // op(reg_vec2,mem_vec,imm)
9000 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9001 OpndItins itins, X86VectorVTInfo DestInfo,
9002 X86VectorVTInfo SrcInfo>{
9003 let ExeDomain = DestInfo.ExeDomain in {
9004 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9005 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
9006 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9007 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9008 (SrcInfo.VT SrcInfo.RC:$src2),
9009 (i8 imm:$src3))), itins.rr>,
9010 Sched<[itins.Sched]>;
9011 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9012 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
9013 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9014 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
9015 (SrcInfo.VT (bitconvert
9016 (SrcInfo.LdFrag addr:$src2))),
9017 (i8 imm:$src3))), itins.rm>,
9018 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9022 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9023 // op(reg_vec2,mem_vec,imm)
9024 // op(reg_vec2,broadcast(eltVt),imm)
9025 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9026 OpndItins itins, X86VectorVTInfo _>:
9027 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
9029 let ExeDomain = _.ExeDomain in
9030 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9031 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9032 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9033 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9034 (OpNode (_.VT _.RC:$src1),
9035 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9036 (i8 imm:$src3)), itins.rm>, EVEX_B,
9037 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9040 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9041 // op(reg_vec2,mem_scalar,imm)
9042 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9043 OpndItins itins, X86VectorVTInfo _> {
9044 let ExeDomain = _.ExeDomain in {
9045 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9046 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9047 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9048 (OpNode (_.VT _.RC:$src1),
9050 (i32 imm:$src3)), itins.rr>,
9051 Sched<[itins.Sched]>;
9052 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9053 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9054 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9055 (OpNode (_.VT _.RC:$src1),
9056 (_.VT (scalar_to_vector
9057 (_.ScalarLdFrag addr:$src2))),
9058 (i32 imm:$src3)), itins.rm>,
9059 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9063 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9064 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9065 SDNode OpNode, OpndItins itins,
9066 X86VectorVTInfo _> {
9067 let ExeDomain = _.ExeDomain in
9068 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9069 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9070 OpcodeStr, "$src3, {sae}, $src2, $src1",
9071 "$src1, $src2, {sae}, $src3",
9072 (OpNode (_.VT _.RC:$src1),
9075 (i32 FROUND_NO_EXC)), itins.rr>,
9076 EVEX_B, Sched<[itins.Sched]>;
9079 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9080 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9081 OpndItins itins, X86VectorVTInfo _> {
9082 let ExeDomain = _.ExeDomain in
9083 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9084 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9085 OpcodeStr, "$src3, {sae}, $src2, $src1",
9086 "$src1, $src2, {sae}, $src3",
9087 (OpNode (_.VT _.RC:$src1),
9090 (i32 FROUND_NO_EXC)), itins.rr>,
9091 EVEX_B, Sched<[itins.Sched]>;
9094 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
9095 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9096 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
9097 let Predicates = [prd] in {
9098 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>,
9099 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>,
9103 let Predicates = [prd, HasVLX] in {
9104 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>,
9106 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>,
9111 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
9112 OpndItins itins, AVX512VLVectorVTInfo DestInfo,
9113 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
9114 let Predicates = [Pred] in {
9115 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
9116 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9118 let Predicates = [Pred, HasVLX] in {
9119 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
9120 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
9121 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info256,
9122 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9126 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
9127 bits<8> opc, SDNode OpNode, OpndItins itins,
9128 Predicate Pred = HasAVX512> {
9129 let Predicates = [Pred] in {
9130 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
9132 let Predicates = [Pred, HasVLX] in {
9133 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
9134 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
9138 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
9139 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
9140 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
9141 let Predicates = [prd] in {
9142 defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>,
9143 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>;
9147 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
9148 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
9149 SDNode OpNodeRnd, SizeItins itins, Predicate prd>{
9150 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
9151 opcPs, OpNode, OpNodeRnd, itins.s, prd>,
9152 EVEX_CD8<32, CD8VF>;
9153 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
9154 opcPd, OpNode, OpNodeRnd, itins.d, prd>,
9155 EVEX_CD8<64, CD8VF>, VEX_W;
9158 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
9159 X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>,
9160 AVX512AIi8Base, EVEX;
9161 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
9162 X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>,
9163 AVX512AIi8Base, EVEX;
9164 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
9165 X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>,
9166 AVX512AIi8Base, EVEX;
9168 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
9169 0x50, X86VRange, X86VRangeRnd,
9170 SSE_ALU_F64P, HasDQI>,
9171 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9172 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
9173 0x50, X86VRange, X86VRangeRnd,
9174 SSE_ALU_F32P, HasDQI>,
9175 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9177 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
9178 f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>,
9179 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9180 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
9181 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>,
9182 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9184 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
9185 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>,
9186 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9187 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
9188 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>,
9189 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9191 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
9192 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>,
9193 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9194 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
9195 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
9196 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9198 let Predicates = [HasAVX512] in {
9199 def : Pat<(v16f32 (ffloor VR512:$src)),
9200 (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
9201 def : Pat<(v16f32 (fnearbyint VR512:$src)),
9202 (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9203 def : Pat<(v16f32 (fceil VR512:$src)),
9204 (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
9205 def : Pat<(v16f32 (frint VR512:$src)),
9206 (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9207 def : Pat<(v16f32 (ftrunc VR512:$src)),
9208 (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
9210 def : Pat<(v8f64 (ffloor VR512:$src)),
9211 (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
9212 def : Pat<(v8f64 (fnearbyint VR512:$src)),
9213 (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9214 def : Pat<(v8f64 (fceil VR512:$src)),
9215 (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
9216 def : Pat<(v8f64 (frint VR512:$src)),
9217 (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9218 def : Pat<(v8f64 (ftrunc VR512:$src)),
9219 (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
9222 let Predicates = [HasVLX] in {
9223 def : Pat<(v4f32 (ffloor VR128X:$src)),
9224 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
9225 def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9226 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9227 def : Pat<(v4f32 (fceil VR128X:$src)),
9228 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
9229 def : Pat<(v4f32 (frint VR128X:$src)),
9230 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9231 def : Pat<(v4f32 (ftrunc VR128X:$src)),
9232 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
9234 def : Pat<(v2f64 (ffloor VR128X:$src)),
9235 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
9236 def : Pat<(v2f64 (fnearbyint VR128X:$src)),
9237 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
9238 def : Pat<(v2f64 (fceil VR128X:$src)),
9239 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
9240 def : Pat<(v2f64 (frint VR128X:$src)),
9241 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
9242 def : Pat<(v2f64 (ftrunc VR128X:$src)),
9243 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
9245 def : Pat<(v8f32 (ffloor VR256X:$src)),
9246 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
9247 def : Pat<(v8f32 (fnearbyint VR256X:$src)),
9248 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
9249 def : Pat<(v8f32 (fceil VR256X:$src)),
9250 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
9251 def : Pat<(v8f32 (frint VR256X:$src)),
9252 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
9253 def : Pat<(v8f32 (ftrunc VR256X:$src)),
9254 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
9256 def : Pat<(v4f64 (ffloor VR256X:$src)),
9257 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
9258 def : Pat<(v4f64 (fnearbyint VR256X:$src)),
9259 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
9260 def : Pat<(v4f64 (fceil VR256X:$src)),
9261 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
9262 def : Pat<(v4f64 (frint VR256X:$src)),
9263 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
9264 def : Pat<(v4f64 (ftrunc VR256X:$src)),
9265 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
9268 multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
9269 AVX512VLVectorVTInfo _, bits<8> opc>{
9270 let Predicates = [HasAVX512] in {
9271 defm Z : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info512>, EVEX_V512;
9274 let Predicates = [HasAVX512, HasVLX] in {
9275 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info256>, EVEX_V256;
9279 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
9280 avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9281 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
9282 avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9283 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
9284 avx512vl_i32_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9285 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
9286 avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9288 let Predicates = [HasAVX512] in {
9289 // Provide fallback in case the load node that is used in the broadcast
9290 // patterns above is used by additional users, which prevents the pattern
9292 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9293 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9294 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9296 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9297 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9298 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9301 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9302 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9303 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9305 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9306 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9307 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9310 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9311 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9312 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9315 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9316 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9317 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9321 multiclass avx512_valign<string OpcodeStr, OpndItins itins,
9322 AVX512VLVectorVTInfo VTInfo_I> {
9323 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
9324 AVX512AIi8Base, EVEX_4V;
9327 defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
9328 EVEX_CD8<32, CD8VF>;
9329 defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
9330 EVEX_CD8<64, CD8VF>, VEX_W;
9332 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
9333 avx512vl_i8_info, avx512vl_i8_info>,
9336 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
9338 def ValignqImm32XForm : SDNodeXForm<imm, [{
9339 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
9341 def ValignqImm8XForm : SDNodeXForm<imm, [{
9342 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
9344 def ValigndImm8XForm : SDNodeXForm<imm, [{
9345 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
9348 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
9349 X86VectorVTInfo From, X86VectorVTInfo To,
9350 SDNodeXForm ImmXForm> {
9351 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9353 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9356 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
9357 To.RC:$src1, To.RC:$src2,
9358 (ImmXForm imm:$src3))>;
9360 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9362 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9365 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
9366 To.RC:$src1, To.RC:$src2,
9367 (ImmXForm imm:$src3))>;
9369 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9371 (From.VT (OpNode From.RC:$src1,
9372 (bitconvert (To.LdFrag addr:$src2)),
9375 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
9376 To.RC:$src1, addr:$src2,
9377 (ImmXForm imm:$src3))>;
9379 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9381 (From.VT (OpNode From.RC:$src1,
9382 (bitconvert (To.LdFrag addr:$src2)),
9385 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
9386 To.RC:$src1, addr:$src2,
9387 (ImmXForm imm:$src3))>;
9390 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
9391 X86VectorVTInfo From,
9393 SDNodeXForm ImmXForm> :
9394 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
9395 def : Pat<(From.VT (OpNode From.RC:$src1,
9396 (bitconvert (To.VT (X86VBroadcast
9397 (To.ScalarLdFrag addr:$src2)))),
9399 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
9400 (ImmXForm imm:$src3))>;
9402 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9404 (From.VT (OpNode From.RC:$src1,
9406 (To.VT (X86VBroadcast
9407 (To.ScalarLdFrag addr:$src2)))),
9410 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
9411 To.RC:$src1, addr:$src2,
9412 (ImmXForm imm:$src3))>;
9414 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9416 (From.VT (OpNode From.RC:$src1,
9418 (To.VT (X86VBroadcast
9419 (To.ScalarLdFrag addr:$src2)))),
9422 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
9423 To.RC:$src1, addr:$src2,
9424 (ImmXForm imm:$src3))>;
9427 let Predicates = [HasAVX512] in {
9428 // For 512-bit we lower to the widest element type we can. So we only need
9429 // to handle converting valignq to valignd.
9430 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
9431 v16i32_info, ValignqImm32XForm>;
9434 let Predicates = [HasVLX] in {
9435 // For 128-bit we lower to the widest element type we can. So we only need
9436 // to handle converting valignq to valignd.
9437 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
9438 v4i32x_info, ValignqImm32XForm>;
9439 // For 256-bit we lower to the widest element type we can. So we only need
9440 // to handle converting valignq to valignd.
9441 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
9442 v8i32x_info, ValignqImm32XForm>;
9445 let Predicates = [HasVLX, HasBWI] in {
9446 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
9447 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
9448 v16i8x_info, ValignqImm8XForm>;
9449 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
9450 v16i8x_info, ValigndImm8XForm>;
9453 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
9454 SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
9457 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9458 OpndItins itins, X86VectorVTInfo _> {
9459 let ExeDomain = _.ExeDomain in {
9460 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9461 (ins _.RC:$src1), OpcodeStr,
9463 (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
9464 Sched<[itins.Sched]>;
9466 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9467 (ins _.MemOp:$src1), OpcodeStr,
9469 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
9470 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
9471 Sched<[itins.Sched.Folded]>;
9475 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9476 OpndItins itins, X86VectorVTInfo _> :
9477 avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
9478 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9479 (ins _.ScalarMemOp:$src1), OpcodeStr,
9480 "${src1}"##_.BroadcastStr,
9481 "${src1}"##_.BroadcastStr,
9482 (_.VT (OpNode (X86VBroadcast
9483 (_.ScalarLdFrag addr:$src1)))), itins.rm>,
9484 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
9485 Sched<[itins.Sched.Folded]>;
9488 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9489 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9491 let Predicates = [prd] in
9492 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9495 let Predicates = [prd, HasVLX] in {
9496 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
9498 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
9503 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9504 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9506 let Predicates = [prd] in
9507 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9510 let Predicates = [prd, HasVLX] in {
9511 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
9513 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
9518 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
9519 SDNode OpNode, OpndItins itins, Predicate prd> {
9520 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
9521 avx512vl_i64_info, prd>, VEX_W;
9522 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
9523 avx512vl_i32_info, prd>;
9526 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
9527 SDNode OpNode, OpndItins itins, Predicate prd> {
9528 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
9529 avx512vl_i16_info, prd>, VEX_WIG;
9530 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
9531 avx512vl_i8_info, prd>, VEX_WIG;
9534 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9535 bits<8> opc_d, bits<8> opc_q,
9536 string OpcodeStr, SDNode OpNode,
9538 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
9540 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
9544 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
9546 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9547 let Predicates = [HasAVX512, NoVLX] in {
9548 def : Pat<(v4i64 (abs VR256X:$src)),
9551 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9553 def : Pat<(v2i64 (abs VR128X:$src)),
9556 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9560 // Use 512bit version to implement 128/256 bit.
9561 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
9562 AVX512VLVectorVTInfo _, Predicate prd> {
9563 let Predicates = [prd, NoVLX] in {
9564 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9566 (!cast<Instruction>(InstrStr # "Zrr")
9567 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9569 _.info256.SubRegIdx)),
9570 _.info256.SubRegIdx)>;
9572 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9574 (!cast<Instruction>(InstrStr # "Zrr")
9575 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9577 _.info128.SubRegIdx)),
9578 _.info128.SubRegIdx)>;
9582 // FIXME: Is there a better scheduler itinerary for VPLZCNT?
9583 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
9584 SSE_INTALU_ITINS_P, HasCDI>;
9586 // FIXME: Is there a better scheduler itinerary for VPCONFLICT?
9587 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
9588 SSE_INTALU_ITINS_P, HasCDI>;
9590 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
9591 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
9592 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
9594 //===---------------------------------------------------------------------===//
9595 // Counts number of ones - VPOPCNTD and VPOPCNTQ
9596 //===---------------------------------------------------------------------===//
9598 // FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
9599 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
9600 SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
9602 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
9603 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
9605 //===---------------------------------------------------------------------===//
9606 // Replicate Single FP - MOVSHDUP and MOVSLDUP
9607 //===---------------------------------------------------------------------===//
9608 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
9610 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
9611 avx512vl_f32_info, HasAVX512>, XS;
9614 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
9615 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
9617 //===----------------------------------------------------------------------===//
9618 // AVX-512 - MOVDDUP
9619 //===----------------------------------------------------------------------===//
9621 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
9622 OpndItins itins, X86VectorVTInfo _> {
9623 let ExeDomain = _.ExeDomain in {
9624 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9625 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9626 (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
9627 Sched<[itins.Sched]>;
9628 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9629 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9630 (_.VT (OpNode (_.VT (scalar_to_vector
9631 (_.ScalarLdFrag addr:$src))))),
9632 itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
9633 Sched<[itins.Sched.Folded]>;
9637 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9638 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
9640 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
9642 let Predicates = [HasAVX512, HasVLX] in {
9643 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
9645 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
9650 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
9652 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
9653 avx512vl_f64_info>, XD, VEX_W;
9656 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
9658 let Predicates = [HasVLX] in {
9659 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
9660 (VMOVDDUPZ128rm addr:$src)>;
9661 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9662 (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9663 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9664 (VMOVDDUPZ128rm addr:$src)>;
9666 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9667 (v2f64 VR128X:$src0)),
9668 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9669 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9670 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9671 (bitconvert (v4i32 immAllZerosV))),
9672 (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9674 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9675 (v2f64 VR128X:$src0)),
9676 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9677 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9678 (bitconvert (v4i32 immAllZerosV))),
9679 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9681 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9682 (v2f64 VR128X:$src0)),
9683 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9684 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9685 (bitconvert (v4i32 immAllZerosV))),
9686 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9689 //===----------------------------------------------------------------------===//
9690 // AVX-512 - Unpack Instructions
9691 //===----------------------------------------------------------------------===//
9692 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9694 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9697 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9698 SSE_INTALU_ITINS_P, HasBWI>;
9699 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9700 SSE_INTALU_ITINS_P, HasBWI>;
9701 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9702 SSE_INTALU_ITINS_P, HasBWI>;
9703 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9704 SSE_INTALU_ITINS_P, HasBWI>;
9706 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9707 SSE_INTALU_ITINS_P, HasAVX512>;
9708 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9709 SSE_INTALU_ITINS_P, HasAVX512>;
9710 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9711 SSE_INTALU_ITINS_P, HasAVX512>;
9712 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9713 SSE_INTALU_ITINS_P, HasAVX512>;
9715 //===----------------------------------------------------------------------===//
9716 // AVX-512 - Extract & Insert Integer Instructions
9717 //===----------------------------------------------------------------------===//
9719 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9720 X86VectorVTInfo _> {
9721 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9722 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9723 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9724 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
9726 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd]>;
9729 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9730 let Predicates = [HasBWI] in {
9731 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9732 (ins _.RC:$src1, u8imm:$src2),
9733 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9734 [(set GR32orGR64:$dst,
9735 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
9736 EVEX, TAPD, Sched<[WriteShuffle]>;
9738 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9742 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9743 let Predicates = [HasBWI] in {
9744 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9745 (ins _.RC:$src1, u8imm:$src2),
9746 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9747 [(set GR32orGR64:$dst,
9748 (X86pextrw (_.VT _.RC:$src1), imm:$src2))],
9749 IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>;
9751 let hasSideEffects = 0 in
9752 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
9753 (ins _.RC:$src1, u8imm:$src2),
9754 OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
9755 IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData<NAME#rr>,
9756 Sched<[WriteShuffle]>;
9758 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
9762 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
9763 RegisterClass GRC> {
9764 let Predicates = [HasDQI] in {
9765 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
9766 (ins _.RC:$src1, u8imm:$src2),
9767 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9769 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
9770 EVEX, TAPD, Sched<[WriteShuffle]>;
9772 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
9773 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9774 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9775 [(store (extractelt (_.VT _.RC:$src1),
9776 imm:$src2),addr:$dst)]>,
9777 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
9778 Sched<[WriteShuffleLd]>;
9782 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
9783 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
9784 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
9785 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
9787 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9788 X86VectorVTInfo _, PatFrag LdFrag> {
9789 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
9790 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9791 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9793 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
9794 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
9797 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9798 X86VectorVTInfo _, PatFrag LdFrag> {
9799 let Predicates = [HasBWI] in {
9800 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9801 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
9802 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9804 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
9805 Sched<[WriteShuffle]>;
9807 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
9811 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
9812 X86VectorVTInfo _, RegisterClass GRC> {
9813 let Predicates = [HasDQI] in {
9814 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9815 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
9816 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9818 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
9819 EVEX_4V, TAPD, Sched<[WriteShuffle]>;
9821 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
9822 _.ScalarLdFrag>, TAPD;
9826 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
9827 extloadi8>, TAPD, VEX_WIG;
9828 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
9829 extloadi16>, PD, VEX_WIG;
9830 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
9831 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
9833 //===----------------------------------------------------------------------===//
9834 // VSHUFPS - VSHUFPD Operations
9835 //===----------------------------------------------------------------------===//
9837 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
9838 AVX512VLVectorVTInfo VTInfo_FP>{
9839 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
9840 SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
9841 AVX512AIi8Base, EVEX_4V;
9844 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
9845 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
9847 //===----------------------------------------------------------------------===//
9848 // AVX-512 - Byte shift Left/Right
9849 //===----------------------------------------------------------------------===//
9851 let Sched = WriteVecShift in
9852 def AVX512_BYTESHIFT : OpndItins<
9853 IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI
9856 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
9857 Format MRMm, string OpcodeStr,
9858 OpndItins itins, X86VectorVTInfo _>{
9859 def rr : AVX512<opc, MRMr,
9860 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
9861 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9862 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))],
9863 itins.rr>, Sched<[itins.Sched]>;
9864 def rm : AVX512<opc, MRMm,
9865 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
9866 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9867 [(set _.RC:$dst,(_.VT (OpNode
9868 (_.VT (bitconvert (_.LdFrag addr:$src1))),
9869 (i8 imm:$src2))))], itins.rm>,
9870 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9873 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
9874 Format MRMm, string OpcodeStr,
9875 OpndItins itins, Predicate prd>{
9876 let Predicates = [prd] in
9877 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9878 OpcodeStr, itins, v64i8_info>, EVEX_V512;
9879 let Predicates = [prd, HasVLX] in {
9880 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9881 OpcodeStr, itins, v32i8x_info>, EVEX_V256;
9882 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9883 OpcodeStr, itins, v16i8x_info>, EVEX_V128;
9886 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
9887 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9889 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
9890 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9894 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
9895 string OpcodeStr, OpndItins itins,
9896 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
9897 def rr : AVX512BI<opc, MRMSrcReg,
9898 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
9899 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9900 [(set _dst.RC:$dst,(_dst.VT
9901 (OpNode (_src.VT _src.RC:$src1),
9902 (_src.VT _src.RC:$src2))))], itins.rr>,
9903 Sched<[itins.Sched]>;
9904 def rm : AVX512BI<opc, MRMSrcMem,
9905 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
9906 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9907 [(set _dst.RC:$dst,(_dst.VT
9908 (OpNode (_src.VT _src.RC:$src1),
9909 (_src.VT (bitconvert
9910 (_src.LdFrag addr:$src2))))))], itins.rm>,
9911 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9914 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
9915 string OpcodeStr, OpndItins itins,
9917 let Predicates = [prd] in
9918 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info,
9919 v64i8_info>, EVEX_V512;
9920 let Predicates = [prd, HasVLX] in {
9921 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info,
9922 v32i8x_info>, EVEX_V256;
9923 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info,
9924 v16i8x_info>, EVEX_V128;
9928 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
9929 SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG;
9931 // Transforms to swizzle an immediate to enable better matching when
9932 // memory operand isn't in the right place.
9933 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
9934 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
9935 uint8_t Imm = N->getZExtValue();
9936 // Swap bits 1/4 and 3/6.
9937 uint8_t NewImm = Imm & 0xa5;
9938 if (Imm & 0x02) NewImm |= 0x10;
9939 if (Imm & 0x10) NewImm |= 0x02;
9940 if (Imm & 0x08) NewImm |= 0x40;
9941 if (Imm & 0x40) NewImm |= 0x08;
9942 return getI8Imm(NewImm, SDLoc(N));
9944 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
9945 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9946 uint8_t Imm = N->getZExtValue();
9947 // Swap bits 2/4 and 3/5.
9948 uint8_t NewImm = Imm & 0xc3;
9949 if (Imm & 0x04) NewImm |= 0x10;
9950 if (Imm & 0x10) NewImm |= 0x04;
9951 if (Imm & 0x08) NewImm |= 0x20;
9952 if (Imm & 0x20) NewImm |= 0x08;
9953 return getI8Imm(NewImm, SDLoc(N));
9955 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
9956 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9957 uint8_t Imm = N->getZExtValue();
9958 // Swap bits 1/2 and 5/6.
9959 uint8_t NewImm = Imm & 0x99;
9960 if (Imm & 0x02) NewImm |= 0x04;
9961 if (Imm & 0x04) NewImm |= 0x02;
9962 if (Imm & 0x20) NewImm |= 0x40;
9963 if (Imm & 0x40) NewImm |= 0x20;
9964 return getI8Imm(NewImm, SDLoc(N));
9966 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
9967 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
9968 uint8_t Imm = N->getZExtValue();
9969 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
9970 uint8_t NewImm = Imm & 0x81;
9971 if (Imm & 0x02) NewImm |= 0x04;
9972 if (Imm & 0x04) NewImm |= 0x10;
9973 if (Imm & 0x08) NewImm |= 0x40;
9974 if (Imm & 0x10) NewImm |= 0x02;
9975 if (Imm & 0x20) NewImm |= 0x08;
9976 if (Imm & 0x40) NewImm |= 0x20;
9977 return getI8Imm(NewImm, SDLoc(N));
9979 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
9980 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
9981 uint8_t Imm = N->getZExtValue();
9982 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
9983 uint8_t NewImm = Imm & 0x81;
9984 if (Imm & 0x02) NewImm |= 0x10;
9985 if (Imm & 0x04) NewImm |= 0x02;
9986 if (Imm & 0x08) NewImm |= 0x20;
9987 if (Imm & 0x10) NewImm |= 0x04;
9988 if (Imm & 0x20) NewImm |= 0x40;
9989 if (Imm & 0x40) NewImm |= 0x08;
9990 return getI8Imm(NewImm, SDLoc(N));
9993 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
9994 OpndItins itins, X86VectorVTInfo _>{
9995 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
9996 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
9997 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
9998 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
9999 (OpNode (_.VT _.RC:$src1),
10002 (i8 imm:$src4)), itins.rr, 1, 1>,
10003 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
10004 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10005 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
10006 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
10007 (OpNode (_.VT _.RC:$src1),
10009 (_.VT (bitconvert (_.LdFrag addr:$src3))),
10010 (i8 imm:$src4)), itins.rm, 1, 0>,
10011 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10012 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10013 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10014 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
10015 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10016 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10017 (OpNode (_.VT _.RC:$src1),
10019 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10020 (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
10021 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
10022 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10023 }// Constraints = "$src1 = $dst"
10025 // Additional patterns for matching passthru operand in other positions.
10026 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10027 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10029 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10030 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10031 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10032 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
10034 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
10035 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10037 // Additional patterns for matching loads in other positions.
10038 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
10039 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10040 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10041 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10042 def : Pat<(_.VT (OpNode _.RC:$src1,
10043 (bitconvert (_.LdFrag addr:$src3)),
10044 _.RC:$src2, (i8 imm:$src4))),
10045 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
10046 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10048 // Additional patterns for matching zero masking with loads in other
10050 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10051 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10052 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10054 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10055 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10056 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10057 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10058 _.RC:$src2, (i8 imm:$src4)),
10060 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
10061 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10063 // Additional patterns for matching masked loads with different
10065 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10066 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
10067 _.RC:$src2, (i8 imm:$src4)),
10069 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10070 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10071 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10072 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10073 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10075 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10076 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10077 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10078 (OpNode _.RC:$src2, _.RC:$src1,
10079 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
10081 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10082 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10083 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10084 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
10085 _.RC:$src1, (i8 imm:$src4)),
10087 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10088 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10089 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10090 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10091 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10093 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10094 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
10096 // Additional patterns for matching broadcasts in other positions.
10097 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10098 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10099 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10100 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10101 def : Pat<(_.VT (OpNode _.RC:$src1,
10102 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10103 _.RC:$src2, (i8 imm:$src4))),
10104 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10105 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10107 // Additional patterns for matching zero masking with broadcasts in other
10109 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10110 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10111 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10113 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10114 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10115 (VPTERNLOG321_imm8 imm:$src4))>;
10116 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10117 (OpNode _.RC:$src1,
10118 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10119 _.RC:$src2, (i8 imm:$src4)),
10121 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10122 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10123 (VPTERNLOG132_imm8 imm:$src4))>;
10125 // Additional patterns for matching masked broadcasts with different
10127 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10128 (OpNode _.RC:$src1,
10129 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10130 _.RC:$src2, (i8 imm:$src4)),
10132 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10133 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10134 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10135 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10136 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10138 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10139 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10140 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10141 (OpNode _.RC:$src2, _.RC:$src1,
10142 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10143 (i8 imm:$src4)), _.RC:$src1)),
10144 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10145 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10146 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10147 (OpNode _.RC:$src2,
10148 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10149 _.RC:$src1, (i8 imm:$src4)),
10151 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10152 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10153 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10154 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10155 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10157 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10158 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
10161 multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
10162 AVX512VLVectorVTInfo _> {
10163 let Predicates = [HasAVX512] in
10164 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
10165 let Predicates = [HasAVX512, HasVLX] in {
10166 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
10167 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
10171 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
10172 avx512vl_i32_info>;
10173 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
10174 avx512vl_i64_info>, VEX_W;
10176 //===----------------------------------------------------------------------===//
10177 // AVX-512 - FixupImm
10178 //===----------------------------------------------------------------------===//
10180 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
10181 OpndItins itins, X86VectorVTInfo _>{
10182 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
10183 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10184 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10185 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10186 (OpNode (_.VT _.RC:$src1),
10188 (_.IntVT _.RC:$src3),
10190 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
10191 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10192 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
10193 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10194 (OpNode (_.VT _.RC:$src1),
10196 (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
10198 (i32 FROUND_CURRENT)), itins.rm>,
10199 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10200 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10201 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10202 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10203 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10204 (OpNode (_.VT _.RC:$src1),
10206 (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10208 (i32 FROUND_CURRENT)), itins.rm>,
10209 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10210 } // Constraints = "$src1 = $dst"
10213 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
10214 SDNode OpNode, OpndItins itins,
10215 X86VectorVTInfo _>{
10216 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
10217 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10218 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10219 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10220 "$src2, $src3, {sae}, $src4",
10221 (OpNode (_.VT _.RC:$src1),
10223 (_.IntVT _.RC:$src3),
10225 (i32 FROUND_NO_EXC)), itins.rr>,
10226 EVEX_B, Sched<[itins.Sched]>;
10230 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
10231 OpndItins itins, X86VectorVTInfo _,
10232 X86VectorVTInfo _src3VT> {
10233 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10234 ExeDomain = _.ExeDomain in {
10235 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10236 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10237 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10238 (OpNode (_.VT _.RC:$src1),
10240 (_src3VT.VT _src3VT.RC:$src3),
10242 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
10243 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10244 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10245 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10246 "$src2, $src3, {sae}, $src4",
10247 (OpNode (_.VT _.RC:$src1),
10249 (_src3VT.VT _src3VT.RC:$src3),
10251 (i32 FROUND_NO_EXC)), itins.rm>,
10252 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10253 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10254 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10255 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10256 (OpNode (_.VT _.RC:$src1),
10258 (_src3VT.VT (scalar_to_vector
10259 (_src3VT.ScalarLdFrag addr:$src3))),
10261 (i32 FROUND_CURRENT)), itins.rm>,
10262 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10266 multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> {
10267 let Predicates = [HasAVX512] in
10268 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10270 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins,
10271 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
10272 let Predicates = [HasAVX512, HasVLX] in {
10273 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10274 _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
10275 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10276 _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
10280 defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10281 SSE_ALU_F32S, f32x_info, v4i32x_info>,
10282 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10283 defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10284 SSE_ALU_F64S, f64x_info, v2i64x_info>,
10285 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10286 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>,
10287 EVEX_CD8<32, CD8VF>;
10288 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>,
10289 EVEX_CD8<64, CD8VF>, VEX_W;
10293 // Patterns used to select SSE scalar fp arithmetic instructions from
10296 // (1) a scalar fp operation followed by a blend
10298 // The effect is that the backend no longer emits unnecessary vector
10299 // insert instructions immediately after SSE scalar fp instructions
10300 // like addss or mulss.
10302 // For example, given the following code:
10303 // __m128 foo(__m128 A, __m128 B) {
10308 // Previously we generated:
10309 // addss %xmm0, %xmm1
10310 // movss %xmm1, %xmm0
10312 // We now generate:
10313 // addss %xmm1, %xmm0
10315 // (2) a vector packed single/double fp operation followed by a vector insert
10317 // The effect is that the backend converts the packed fp instruction
10318 // followed by a vector insert into a single SSE scalar fp instruction.
10320 // For example, given the following code:
10321 // __m128 foo(__m128 A, __m128 B) {
10322 // __m128 C = A + B;
10323 // return (__m128) {c[0], a[1], a[2], a[3]};
10326 // Previously we generated:
10327 // addps %xmm0, %xmm1
10328 // movss %xmm1, %xmm0
10330 // We now generate:
10331 // addss %xmm1, %xmm0
10333 // TODO: Some canonicalization in lowering would simplify the number of
10334 // patterns we have to try to match.
10335 multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10336 let Predicates = [HasAVX512] in {
10337 // extracted scalar math op with insert via movss
10338 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10339 (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10341 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
10342 (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
10344 // vector math op with insert via movss
10345 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10346 (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
10347 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10349 // extracted masked scalar math op with insert via movss
10350 def : Pat<(X86Movss (v4f32 VR128X:$src1),
10352 (X86selects VK1WM:$mask,
10353 (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10356 (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10357 VK1WM:$mask, v4f32:$src1,
10358 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
10362 defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10363 defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10364 defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10365 defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10367 multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10368 let Predicates = [HasAVX512] in {
10369 // extracted scalar math op with insert via movsd
10370 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10371 (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10373 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
10374 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10376 // vector math op with insert via movsd
10377 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10378 (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
10379 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10381 // extracted masked scalar math op with insert via movss
10382 def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10384 (X86selects VK1WM:$mask,
10385 (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10388 (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10389 VK1WM:$mask, v2f64:$src1,
10390 (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
10394 defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10395 defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10396 defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10397 defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
10399 //===----------------------------------------------------------------------===//
10400 // AES instructions
10401 //===----------------------------------------------------------------------===//
10403 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
10404 let Predicates = [HasVLX, HasVAES] in {
10405 defm Z128 : AESI_binop_rm_int<Op, OpStr,
10406 !cast<Intrinsic>(IntPrefix),
10407 loadv2i64, 0, VR128X, i128mem>,
10408 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
10409 defm Z256 : AESI_binop_rm_int<Op, OpStr,
10410 !cast<Intrinsic>(IntPrefix##"_256"),
10411 loadv4i64, 0, VR256X, i256mem>,
10412 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
10414 let Predicates = [HasAVX512, HasVAES] in
10415 defm Z : AESI_binop_rm_int<Op, OpStr,
10416 !cast<Intrinsic>(IntPrefix##"_512"),
10417 loadv8i64, 0, VR512, i512mem>,
10418 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
10421 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
10422 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
10423 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
10424 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
10426 //===----------------------------------------------------------------------===//
10427 // PCLMUL instructions - Carry less multiplication
10428 //===----------------------------------------------------------------------===//
10430 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
10431 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
10432 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
10434 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
10435 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
10436 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
10438 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
10439 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
10440 EVEX_CD8<64, CD8VF>, VEX_WIG;
10444 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
10445 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
10446 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
10448 //===----------------------------------------------------------------------===//
10450 //===----------------------------------------------------------------------===//
10452 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
10453 OpndItins itins, X86VectorVTInfo VTI> {
10454 let Constraints = "$src1 = $dst",
10455 ExeDomain = VTI.ExeDomain in {
10456 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10457 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10458 "$src3, $src2", "$src2, $src3",
10459 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
10460 itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
10461 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10462 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10463 "$src3, $src2", "$src2, $src3",
10464 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10465 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
10466 itins.rm>, AVX512FMA3Base,
10467 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10471 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
10472 OpndItins itins, X86VectorVTInfo VTI>
10473 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
10474 let Constraints = "$src1 = $dst",
10475 ExeDomain = VTI.ExeDomain in
10476 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10477 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
10478 "${src3}"##VTI.BroadcastStr##", $src2",
10479 "$src2, ${src3}"##VTI.BroadcastStr,
10480 (OpNode VTI.RC:$src1, VTI.RC:$src2,
10481 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
10482 itins.rm>, AVX512FMA3Base, EVEX_B,
10483 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10486 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
10487 OpndItins itins, AVX512VLVectorVTInfo VTI> {
10488 let Predicates = [HasVBMI2] in
10489 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
10490 let Predicates = [HasVBMI2, HasVLX] in {
10491 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10492 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
10496 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
10497 OpndItins itins, AVX512VLVectorVTInfo VTI> {
10498 let Predicates = [HasVBMI2] in
10499 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
10500 let Predicates = [HasVBMI2, HasVLX] in {
10501 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10502 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
10505 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
10506 SDNode OpNode, OpndItins itins> {
10507 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
10508 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
10509 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
10510 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10511 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
10512 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
10515 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
10516 SDNode OpNode, OpndItins itins> {
10517 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
10518 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
10519 VEX_W, EVEX_CD8<16, CD8VF>;
10520 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
10521 OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10522 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
10523 itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10527 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
10528 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
10529 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
10530 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
10533 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS,
10534 avx512vl_i8_info, HasVBMI2>, EVEX;
10535 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS,
10536 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
10538 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND,
10539 avx512vl_i8_info, HasVBMI2>, EVEX;
10540 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND,
10541 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
10543 //===----------------------------------------------------------------------===//
10545 //===----------------------------------------------------------------------===//
10547 let Constraints = "$src1 = $dst" in
10548 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
10549 OpndItins itins, X86VectorVTInfo VTI> {
10550 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10551 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10552 "$src3, $src2", "$src2, $src3",
10553 (VTI.VT (OpNode VTI.RC:$src1,
10554 VTI.RC:$src2, VTI.RC:$src3)),
10555 itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>;
10556 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10557 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10558 "$src3, $src2", "$src2, $src3",
10559 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10560 (VTI.VT (bitconvert
10561 (VTI.LdFrag addr:$src3))))),
10562 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
10563 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10564 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10565 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
10566 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
10567 "$src2, ${src3}"##VTI.BroadcastStr,
10568 (OpNode VTI.RC:$src1, VTI.RC:$src2,
10569 (VTI.VT (X86VBroadcast
10570 (VTI.ScalarLdFrag addr:$src3)))),
10571 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
10572 T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10575 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> {
10576 let Predicates = [HasVNNI] in
10577 defm Z : VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512;
10578 let Predicates = [HasVNNI, HasVLX] in {
10579 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256;
10580 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128;
10584 // FIXME: Is there a better scheduler itinerary for VPDP?
10585 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>;
10586 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>;
10587 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>;
10588 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
10590 //===----------------------------------------------------------------------===//
10592 //===----------------------------------------------------------------------===//
10594 // FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
10595 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
10596 avx512vl_i8_info, HasBITALG>;
10597 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
10598 avx512vl_i16_info, HasBITALG>, VEX_W;
10600 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
10601 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
10603 multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
10604 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
10605 (ins VTI.RC:$src1, VTI.RC:$src2),
10607 "$src2, $src1", "$src1, $src2",
10608 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
10609 (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD,
10610 Sched<[itins.Sched]>;
10611 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
10612 (ins VTI.RC:$src1, VTI.MemOp:$src2),
10614 "$src2, $src1", "$src1, $src2",
10615 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
10616 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))),
10617 itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
10618 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10621 multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> {
10622 let Predicates = [HasBITALG] in
10623 defm Z : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512;
10624 let Predicates = [HasBITALG, HasVLX] in {
10625 defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256;
10626 defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128;
10630 // FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB?
10631 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>;
10633 //===----------------------------------------------------------------------===//
10635 //===----------------------------------------------------------------------===//
10637 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
10638 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10639 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info,
10640 SSE_INTALU_ITINS_P, 1>, EVEX_V512;
10641 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10642 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info,
10643 SSE_INTALU_ITINS_P, 1>, EVEX_V256;
10644 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info,
10645 SSE_INTALU_ITINS_P, 1>, EVEX_V128;
10649 defm GF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>,
10650 EVEX_CD8<8, CD8VF>, T8PD;
10652 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
10653 OpndItins itins, X86VectorVTInfo VTI,
10654 X86VectorVTInfo BcstVTI>
10655 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
10656 let ExeDomain = VTI.ExeDomain in
10657 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10658 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
10659 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
10660 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
10661 (OpNode (VTI.VT VTI.RC:$src1),
10662 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
10663 (i8 imm:$src3)), itins.rm>, EVEX_B,
10664 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10667 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
10669 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10670 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
10671 v8i64_info>, EVEX_V512;
10672 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10673 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
10674 v4i64x_info>, EVEX_V256;
10675 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
10676 v2i64x_info>, EVEX_V128;
10680 defm GF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
10681 X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
10682 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10683 defm GF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
10684 X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
10685 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;