1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT). These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
23 RegisterClass RC = rc;
24 ValueType EltVT = eltvt;
25 int NumElts = numelts;
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
34 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
36 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
39 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
48 ValueType VT = !cast<ValueType>(VTName);
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
52 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
55 // "i" for integer types and "f" for floating-point types
56 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
58 // Size of RC in bits, e.g. 512 for VR512.
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
75 !if (!eq (Size, 512), "v8i64",
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
85 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
87 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
93 // The corresponding float type, e.g. v16f32 for v16i32
94 // Note: For EltSize < 32, FloatVT is illegal and TableGen
95 // fails to compile, so we choose FloatVT = VT
96 ValueType FloatVT = !cast<ValueType>(
97 !if (!eq (!srl(EltSize,5),0),
99 !if (!eq(TypeVariantName, "i"),
100 "v" # NumElts # "f" # EltSize,
103 ValueType IntVT = !cast<ValueType>(
104 !if (!eq (!srl(EltSize,5),0),
106 !if (!eq(TypeVariantName, "f"),
107 "v" # NumElts # "i" # EltSize,
109 // The string to specify embedded broadcast in assembly.
110 string BroadcastStr = "{1to" # NumElts # "}";
112 // 8-bit compressed displacement tuple/subvector format. This is only
113 // defined for NumElts <= 8.
114 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115 !cast<CD8VForm>("CD8VT" # NumElts), ?);
117 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118 !if (!eq (Size, 256), sub_ymm, ?));
120 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
124 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
126 // A vector tye of the same width with element type i64. This is used to
127 // create patterns for logic ops.
128 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
130 // A vector type of the same width with element type i32. This is used to
131 // create the canonical constant zero node ImmAllZerosV.
132 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
135 string ZSuffix = !if (!eq (Size, 128), "Z128",
136 !if (!eq (Size, 256), "Z256", "Z"));
139 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
140 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
141 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
143 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
146 // "x" in v32i8x_info means RC = VR256X
147 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
148 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
150 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
151 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
152 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
154 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
155 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
156 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
157 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
158 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
159 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
161 // We map scalar types to the smallest (128-bit) vector type
162 // with the appropriate element type. This allows to use the same masking logic.
163 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
164 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
165 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
166 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
168 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169 X86VectorVTInfo i128> {
170 X86VectorVTInfo info512 = i512;
171 X86VectorVTInfo info256 = i256;
172 X86VectorVTInfo info128 = i128;
175 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
177 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
179 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
181 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
183 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
185 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
188 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
190 RegisterClass KRC = _krc;
191 RegisterClass KRCWM = _krcwm;
195 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
196 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
197 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
198 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
199 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
200 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
201 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
203 // This multiclass generates the masking variants from the non-masking
204 // variant. It only provides the assembly pieces for the masking variants.
205 // It assumes custom ISel patterns for masking which can be provided as
206 // template arguments.
207 multiclass AVX512_maskable_custom<bits<8> O, Format F,
209 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
211 string AttSrcAsm, string IntelSrcAsm,
213 list<dag> MaskingPattern,
214 list<dag> ZeroMaskingPattern,
216 string MaskingConstraint = "",
217 bit IsCommutable = 0,
218 bit IsKCommutable = 0> {
219 let isCommutable = IsCommutable in
220 def NAME: AVX512<O, F, Outs, Ins,
221 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
222 "$dst, "#IntelSrcAsm#"}",
225 // Prefer over VMOV*rrk Pat<>
226 let isCommutable = IsKCommutable in
227 def NAME#k: AVX512<O, F, Outs, MaskingIns,
228 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
229 "$dst {${mask}}, "#IntelSrcAsm#"}",
230 MaskingPattern, itin>,
232 // In case of the 3src subclass this is overridden with a let.
233 string Constraints = MaskingConstraint;
236 // Zero mask does not add any restrictions to commute operands transformation.
237 // So, it is Ok to use IsCommutable instead of IsKCommutable.
238 let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
239 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
240 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
241 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
248 // Common base class of AVX512_maskable and AVX512_maskable_3src.
249 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
251 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
253 string AttSrcAsm, string IntelSrcAsm,
254 dag RHS, dag MaskingRHS,
256 SDNode Select = vselect,
257 string MaskingConstraint = "",
258 bit IsCommutable = 0,
259 bit IsKCommutable = 0> :
260 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
261 AttSrcAsm, IntelSrcAsm,
262 [(set _.RC:$dst, RHS)],
263 [(set _.RC:$dst, MaskingRHS)],
265 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
266 itin, MaskingConstraint, IsCommutable,
269 // This multiclass generates the unconditional/non-masking, the masking and
270 // the zero-masking variant of the vector instruction. In the masking case, the
271 // perserved vector elements come from a new dummy input operand tied to $dst.
272 // This version uses a separate dag for non-masking and masking.
273 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
274 dag Outs, dag Ins, string OpcodeStr,
275 string AttSrcAsm, string IntelSrcAsm,
276 dag RHS, dag MaskRHS,
278 bit IsCommutable = 0, bit IsKCommutable = 0,
279 SDNode Select = vselect> :
280 AVX512_maskable_custom<O, F, Outs, Ins,
281 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
282 !con((ins _.KRCWM:$mask), Ins),
283 OpcodeStr, AttSrcAsm, IntelSrcAsm,
284 [(set _.RC:$dst, RHS)],
286 (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
288 (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
289 itin, "$src0 = $dst", IsCommutable, IsKCommutable>;
291 // This multiclass generates the unconditional/non-masking, the masking and
292 // the zero-masking variant of the vector instruction. In the masking case, the
293 // perserved vector elements come from a new dummy input operand tied to $dst.
294 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
295 dag Outs, dag Ins, string OpcodeStr,
296 string AttSrcAsm, string IntelSrcAsm,
299 bit IsCommutable = 0, bit IsKCommutable = 0,
300 SDNode Select = vselect> :
301 AVX512_maskable_common<O, F, _, Outs, Ins,
302 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
303 !con((ins _.KRCWM:$mask), Ins),
304 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
305 (Select _.KRCWM:$mask, RHS, _.RC:$src0), itin,
306 Select, "$src0 = $dst", IsCommutable, IsKCommutable>;
308 // This multiclass generates the unconditional/non-masking, the masking and
309 // the zero-masking variant of the scalar instruction.
310 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
311 dag Outs, dag Ins, string OpcodeStr,
312 string AttSrcAsm, string IntelSrcAsm,
315 bit IsCommutable = 0> :
316 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
317 RHS, itin, IsCommutable, 0, X86selects>;
319 // Similar to AVX512_maskable but in this case one of the source operands
320 // ($src1) is already tied to $dst so we just use that for the preserved
321 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
323 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
324 dag Outs, dag NonTiedIns, string OpcodeStr,
325 string AttSrcAsm, string IntelSrcAsm,
326 dag RHS, InstrItinClass itin,
327 bit IsCommutable = 0,
328 bit IsKCommutable = 0,
329 SDNode Select = vselect,
331 AVX512_maskable_common<O, F, _, Outs,
332 !con((ins _.RC:$src1), NonTiedIns),
333 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
334 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
335 OpcodeStr, AttSrcAsm, IntelSrcAsm,
336 !if(MaskOnly, (null_frag), RHS),
337 (Select _.KRCWM:$mask, RHS, _.RC:$src1), itin,
338 Select, "", IsCommutable, IsKCommutable>;
340 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
341 dag Outs, dag NonTiedIns, string OpcodeStr,
342 string AttSrcAsm, string IntelSrcAsm,
343 dag RHS, InstrItinClass itin,
344 bit IsCommutable = 0,
345 bit IsKCommutable = 0,
347 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
348 IntelSrcAsm, RHS, itin, IsCommutable, IsKCommutable,
349 X86selects, MaskOnly>;
351 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
354 string AttSrcAsm, string IntelSrcAsm,
356 InstrItinClass itin> :
357 AVX512_maskable_custom<O, F, Outs, Ins,
358 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
359 !con((ins _.KRCWM:$mask), Ins),
360 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
361 itin, "$src0 = $dst">;
364 // Instruction with mask that puts result in mask register,
365 // like "compare" and "vptest"
366 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
368 dag Ins, dag MaskingIns,
370 string AttSrcAsm, string IntelSrcAsm,
372 list<dag> MaskingPattern,
374 bit IsCommutable = 0> {
375 let isCommutable = IsCommutable in
376 def NAME: AVX512<O, F, Outs, Ins,
377 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
378 "$dst, "#IntelSrcAsm#"}",
381 def NAME#k: AVX512<O, F, Outs, MaskingIns,
382 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
383 "$dst {${mask}}, "#IntelSrcAsm#"}",
384 MaskingPattern, itin>, EVEX_K;
387 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
389 dag Ins, dag MaskingIns,
391 string AttSrcAsm, string IntelSrcAsm,
392 dag RHS, dag MaskingRHS,
394 bit IsCommutable = 0> :
395 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
396 AttSrcAsm, IntelSrcAsm,
397 [(set _.KRC:$dst, RHS)],
398 [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
400 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
401 dag Outs, dag Ins, string OpcodeStr,
402 string AttSrcAsm, string IntelSrcAsm,
403 dag RHS, InstrItinClass itin,
404 bit IsCommutable = 0> :
405 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
406 !con((ins _.KRCWM:$mask), Ins),
407 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
408 (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
410 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
411 dag Outs, dag Ins, string OpcodeStr,
412 string AttSrcAsm, string IntelSrcAsm,
413 InstrItinClass itin> :
414 AVX512_maskable_custom_cmp<O, F, Outs,
415 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
416 AttSrcAsm, IntelSrcAsm, [],[], itin>;
418 // This multiclass generates the unconditional/non-masking, the masking and
419 // the zero-masking variant of the vector instruction. In the masking case, the
420 // perserved vector elements come from a new dummy input operand tied to $dst.
421 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
422 dag Outs, dag Ins, string OpcodeStr,
423 string AttSrcAsm, string IntelSrcAsm,
424 dag RHS, dag MaskedRHS,
426 bit IsCommutable = 0, SDNode Select = vselect> :
427 AVX512_maskable_custom<O, F, Outs, Ins,
428 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
429 !con((ins _.KRCWM:$mask), Ins),
430 OpcodeStr, AttSrcAsm, IntelSrcAsm,
431 [(set _.RC:$dst, RHS)],
433 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
435 (Select _.KRCWM:$mask, MaskedRHS,
437 itin, "$src0 = $dst", IsCommutable>;
440 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
441 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
442 // swizzled by ExecutionDepsFix to pxor.
443 // We set canFoldAsLoad because this can be converted to a constant-pool
444 // load of an all-zeros value if folding it would be beneficial.
445 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
446 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
447 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
448 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
449 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
450 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
453 // Alias instructions that allow VPTERNLOG to be used with a mask to create
454 // a mix of all ones and all zeros elements. This is done this way to force
455 // the same register to be used as input for all three sources.
456 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
457 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
458 (ins VK16WM:$mask), "",
459 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
460 (v16i32 immAllOnesV),
461 (v16i32 immAllZerosV)))]>;
462 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
463 (ins VK8WM:$mask), "",
464 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
465 (bc_v8i64 (v16i32 immAllOnesV)),
466 (bc_v8i64 (v16i32 immAllZerosV))))]>;
469 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
470 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
471 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
472 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
473 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
474 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
477 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
478 // This is expanded by ExpandPostRAPseudos.
479 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
480 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
481 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
482 [(set FR32X:$dst, fp32imm0)]>;
483 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
484 [(set FR64X:$dst, fpimm0)]>;
487 //===----------------------------------------------------------------------===//
488 // AVX-512 - VECTOR INSERT
491 // Supports two different pattern operators for mask and unmasked ops. Allows
492 // null_frag to be passed for one.
493 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
495 SDPatternOperator vinsert_insert,
496 SDPatternOperator vinsert_for_mask,
498 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
499 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
500 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
501 "vinsert" # From.EltTypeName # "x" # From.NumElts,
502 "$src3, $src2, $src1", "$src1, $src2, $src3",
503 (vinsert_insert:$src3 (To.VT To.RC:$src1),
504 (From.VT From.RC:$src2),
506 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
507 (From.VT From.RC:$src2),
508 (iPTR imm)), itins.rr>,
509 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
511 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
512 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
513 "vinsert" # From.EltTypeName # "x" # From.NumElts,
514 "$src3, $src2, $src1", "$src1, $src2, $src3",
515 (vinsert_insert:$src3 (To.VT To.RC:$src1),
516 (From.VT (bitconvert (From.LdFrag addr:$src2))),
518 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
519 (From.VT (bitconvert (From.LdFrag addr:$src2))),
520 (iPTR imm)), itins.rm>, AVX512AIi8Base, EVEX_4V,
521 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
522 Sched<[itins.Sched.Folded, ReadAfterLd]>;
526 // Passes the same pattern operator for masked and unmasked ops.
527 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
529 SDPatternOperator vinsert_insert,
531 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>;
533 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
534 X86VectorVTInfo To, PatFrag vinsert_insert,
535 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
536 let Predicates = p in {
537 def : Pat<(vinsert_insert:$ins
538 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
539 (To.VT (!cast<Instruction>(InstrStr#"rr")
540 To.RC:$src1, From.RC:$src2,
541 (INSERT_get_vinsert_imm To.RC:$ins)))>;
543 def : Pat<(vinsert_insert:$ins
545 (From.VT (bitconvert (From.LdFrag addr:$src2))),
547 (To.VT (!cast<Instruction>(InstrStr#"rm")
548 To.RC:$src1, addr:$src2,
549 (INSERT_get_vinsert_imm To.RC:$ins)))>;
553 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
554 ValueType EltVT64, int Opcode256,
557 let Predicates = [HasVLX] in
558 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
559 X86VectorVTInfo< 4, EltVT32, VR128X>,
560 X86VectorVTInfo< 8, EltVT32, VR256X>,
561 vinsert128_insert, itins>, EVEX_V256;
563 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
564 X86VectorVTInfo< 4, EltVT32, VR128X>,
565 X86VectorVTInfo<16, EltVT32, VR512>,
566 vinsert128_insert, itins>, EVEX_V512;
568 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
569 X86VectorVTInfo< 4, EltVT64, VR256X>,
570 X86VectorVTInfo< 8, EltVT64, VR512>,
571 vinsert256_insert, itins>, VEX_W, EVEX_V512;
573 // Even with DQI we'd like to only use these instructions for masking.
574 let Predicates = [HasVLX, HasDQI] in
575 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
576 X86VectorVTInfo< 2, EltVT64, VR128X>,
577 X86VectorVTInfo< 4, EltVT64, VR256X>,
578 null_frag, vinsert128_insert, itins>,
581 // Even with DQI we'd like to only use these instructions for masking.
582 let Predicates = [HasDQI] in {
583 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
584 X86VectorVTInfo< 2, EltVT64, VR128X>,
585 X86VectorVTInfo< 8, EltVT64, VR512>,
586 null_frag, vinsert128_insert, itins>,
589 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
590 X86VectorVTInfo< 8, EltVT32, VR256X>,
591 X86VectorVTInfo<16, EltVT32, VR512>,
592 null_frag, vinsert256_insert, itins>,
597 // FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI?
598 let Sched = WriteFShuffle256 in
599 def AVX512_VINSERTF : OpndItins<
600 IIC_SSE_SHUFP, IIC_SSE_SHUFP
602 let Sched = WriteShuffle256 in
603 def AVX512_VINSERTI : OpndItins<
604 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
607 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>;
608 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>;
610 // Codegen pattern with the alternative types,
611 // Even with AVX512DQ we'll still use these for unmasked operations.
612 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
613 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
614 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
615 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
617 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
618 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
619 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
620 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
622 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
623 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
624 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
625 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
627 // Codegen pattern with the alternative types insert VEC128 into VEC256
628 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
629 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
630 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
631 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
632 // Codegen pattern with the alternative types insert VEC128 into VEC512
633 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
634 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
635 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
636 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
637 // Codegen pattern with the alternative types insert VEC256 into VEC512
638 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
639 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
640 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
641 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
644 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
645 X86VectorVTInfo To, X86VectorVTInfo Cast,
646 PatFrag vinsert_insert,
647 SDNodeXForm INSERT_get_vinsert_imm,
649 let Predicates = p in {
651 (vselect Cast.KRCWM:$mask,
653 (vinsert_insert:$ins (To.VT To.RC:$src1),
654 (From.VT From.RC:$src2),
657 (!cast<Instruction>(InstrStr#"rrk")
658 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
659 (INSERT_get_vinsert_imm To.RC:$ins))>;
661 (vselect Cast.KRCWM:$mask,
663 (vinsert_insert:$ins (To.VT To.RC:$src1),
666 (From.LdFrag addr:$src2))),
669 (!cast<Instruction>(InstrStr#"rmk")
670 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
671 (INSERT_get_vinsert_imm To.RC:$ins))>;
674 (vselect Cast.KRCWM:$mask,
676 (vinsert_insert:$ins (To.VT To.RC:$src1),
677 (From.VT From.RC:$src2),
680 (!cast<Instruction>(InstrStr#"rrkz")
681 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
682 (INSERT_get_vinsert_imm To.RC:$ins))>;
684 (vselect Cast.KRCWM:$mask,
686 (vinsert_insert:$ins (To.VT To.RC:$src1),
689 (From.LdFrag addr:$src2))),
692 (!cast<Instruction>(InstrStr#"rmkz")
693 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
694 (INSERT_get_vinsert_imm To.RC:$ins))>;
698 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
699 v8f32x_info, vinsert128_insert,
700 INSERT_get_vinsert128_imm, [HasVLX]>;
701 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
702 v4f64x_info, vinsert128_insert,
703 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
705 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
706 v8i32x_info, vinsert128_insert,
707 INSERT_get_vinsert128_imm, [HasVLX]>;
708 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
709 v8i32x_info, vinsert128_insert,
710 INSERT_get_vinsert128_imm, [HasVLX]>;
711 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
712 v8i32x_info, vinsert128_insert,
713 INSERT_get_vinsert128_imm, [HasVLX]>;
714 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
715 v4i64x_info, vinsert128_insert,
716 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
717 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
718 v4i64x_info, vinsert128_insert,
719 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
720 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
721 v4i64x_info, vinsert128_insert,
722 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
724 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
725 v16f32_info, vinsert128_insert,
726 INSERT_get_vinsert128_imm, [HasAVX512]>;
727 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
728 v8f64_info, vinsert128_insert,
729 INSERT_get_vinsert128_imm, [HasDQI]>;
731 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
732 v16i32_info, vinsert128_insert,
733 INSERT_get_vinsert128_imm, [HasAVX512]>;
734 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
735 v16i32_info, vinsert128_insert,
736 INSERT_get_vinsert128_imm, [HasAVX512]>;
737 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
738 v16i32_info, vinsert128_insert,
739 INSERT_get_vinsert128_imm, [HasAVX512]>;
740 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
741 v8i64_info, vinsert128_insert,
742 INSERT_get_vinsert128_imm, [HasDQI]>;
743 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
744 v8i64_info, vinsert128_insert,
745 INSERT_get_vinsert128_imm, [HasDQI]>;
746 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
747 v8i64_info, vinsert128_insert,
748 INSERT_get_vinsert128_imm, [HasDQI]>;
750 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
751 v16f32_info, vinsert256_insert,
752 INSERT_get_vinsert256_imm, [HasDQI]>;
753 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
754 v8f64_info, vinsert256_insert,
755 INSERT_get_vinsert256_imm, [HasAVX512]>;
757 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
758 v16i32_info, vinsert256_insert,
759 INSERT_get_vinsert256_imm, [HasDQI]>;
760 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
761 v16i32_info, vinsert256_insert,
762 INSERT_get_vinsert256_imm, [HasDQI]>;
763 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
764 v16i32_info, vinsert256_insert,
765 INSERT_get_vinsert256_imm, [HasDQI]>;
766 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
767 v8i64_info, vinsert256_insert,
768 INSERT_get_vinsert256_imm, [HasAVX512]>;
769 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
770 v8i64_info, vinsert256_insert,
771 INSERT_get_vinsert256_imm, [HasAVX512]>;
772 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
773 v8i64_info, vinsert256_insert,
774 INSERT_get_vinsert256_imm, [HasAVX512]>;
776 // vinsertps - insert f32 to XMM
777 let ExeDomain = SSEPackedSingle in {
778 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
779 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
780 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
781 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))],
782 IIC_SSE_INSERTPS_RR>, EVEX_4V, Sched<[WriteFShuffle]>;
783 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
784 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
785 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
786 [(set VR128X:$dst, (X86insertps VR128X:$src1,
787 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
788 imm:$src3))], IIC_SSE_INSERTPS_RM>, EVEX_4V,
789 EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd, ReadAfterLd]>;
792 //===----------------------------------------------------------------------===//
793 // AVX-512 VECTOR EXTRACT
796 // Supports two different pattern operators for mask and unmasked ops. Allows
797 // null_frag to be passed for one.
798 multiclass vextract_for_size_split<int Opcode,
799 X86VectorVTInfo From, X86VectorVTInfo To,
800 SDPatternOperator vextract_extract,
801 SDPatternOperator vextract_for_mask,
804 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
805 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
806 (ins From.RC:$src1, u8imm:$idx),
807 "vextract" # To.EltTypeName # "x" # To.NumElts,
808 "$idx, $src1", "$src1, $idx",
809 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
810 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm)),
811 itins.rr>, AVX512AIi8Base, EVEX, Sched<[itins.Sched]>;
813 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
814 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
815 "vextract" # To.EltTypeName # "x" # To.NumElts #
816 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
817 [(store (To.VT (vextract_extract:$idx
818 (From.VT From.RC:$src1), (iPTR imm))),
819 addr:$dst)], itins.rm>, EVEX,
820 Sched<[itins.Sched.Folded, ReadAfterLd]>;
822 let mayStore = 1, hasSideEffects = 0 in
823 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
824 (ins To.MemOp:$dst, To.KRCWM:$mask,
825 From.RC:$src1, u8imm:$idx),
826 "vextract" # To.EltTypeName # "x" # To.NumElts #
827 "\t{$idx, $src1, $dst {${mask}}|"
828 "$dst {${mask}}, $src1, $idx}",
829 [], itins.rm>, EVEX_K, EVEX,
830 Sched<[itins.Sched.Folded, ReadAfterLd]>;
834 // Passes the same pattern operator for masked and unmasked ops.
835 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
837 SDPatternOperator vextract_extract,
839 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, itins>;
841 // Codegen pattern for the alternative types
842 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
843 X86VectorVTInfo To, PatFrag vextract_extract,
844 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
845 let Predicates = p in {
846 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
847 (To.VT (!cast<Instruction>(InstrStr#"rr")
849 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
850 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
851 (iPTR imm))), addr:$dst),
852 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
853 (EXTRACT_get_vextract_imm To.RC:$ext))>;
857 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
858 ValueType EltVT64, int Opcode256,
860 let Predicates = [HasAVX512] in {
861 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
862 X86VectorVTInfo<16, EltVT32, VR512>,
863 X86VectorVTInfo< 4, EltVT32, VR128X>,
864 vextract128_extract, itins>,
865 EVEX_V512, EVEX_CD8<32, CD8VT4>;
866 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
867 X86VectorVTInfo< 8, EltVT64, VR512>,
868 X86VectorVTInfo< 4, EltVT64, VR256X>,
869 vextract256_extract, itins>,
870 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
872 let Predicates = [HasVLX] in
873 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
874 X86VectorVTInfo< 8, EltVT32, VR256X>,
875 X86VectorVTInfo< 4, EltVT32, VR128X>,
876 vextract128_extract, itins>,
877 EVEX_V256, EVEX_CD8<32, CD8VT4>;
879 // Even with DQI we'd like to only use these instructions for masking.
880 let Predicates = [HasVLX, HasDQI] in
881 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
882 X86VectorVTInfo< 4, EltVT64, VR256X>,
883 X86VectorVTInfo< 2, EltVT64, VR128X>,
884 null_frag, vextract128_extract, itins>,
885 VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
887 // Even with DQI we'd like to only use these instructions for masking.
888 let Predicates = [HasDQI] in {
889 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
890 X86VectorVTInfo< 8, EltVT64, VR512>,
891 X86VectorVTInfo< 2, EltVT64, VR128X>,
892 null_frag, vextract128_extract, itins>,
893 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
894 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
895 X86VectorVTInfo<16, EltVT32, VR512>,
896 X86VectorVTInfo< 8, EltVT32, VR256X>,
897 null_frag, vextract256_extract, itins>,
898 EVEX_V512, EVEX_CD8<32, CD8VT8>;
902 // FIXME: Is there a better scheduler itinerary for VEXTRACTF/VEXTRACTI?
903 let Sched = WriteFShuffle256 in
904 def AVX512_VEXTRACTF : OpndItins<
905 IIC_SSE_SHUFP, IIC_SSE_SHUFP
907 let Sched = WriteShuffle256 in
908 def AVX512_VEXTRACTI : OpndItins<
909 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
912 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, AVX512_VEXTRACTF>;
913 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, AVX512_VEXTRACTI>;
915 // extract_subvector codegen patterns with the alternative types.
916 // Even with AVX512DQ we'll still use these for unmasked operations.
917 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
918 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
919 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
920 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
922 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
923 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
924 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
925 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
927 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
929 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
932 // Codegen pattern with the alternative types extract VEC128 from VEC256
933 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
934 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
935 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
936 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
938 // Codegen pattern with the alternative types extract VEC128 from VEC512
939 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
941 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
942 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
943 // Codegen pattern with the alternative types extract VEC256 from VEC512
944 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
945 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
946 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
947 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
950 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
951 // smaller extract to enable EVEX->VEX.
952 let Predicates = [NoVLX] in {
953 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
954 (v2i64 (VEXTRACTI128rr
955 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
957 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
958 (v2f64 (VEXTRACTF128rr
959 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
961 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
962 (v4i32 (VEXTRACTI128rr
963 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
965 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
966 (v4f32 (VEXTRACTF128rr
967 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
969 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
970 (v8i16 (VEXTRACTI128rr
971 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
973 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
974 (v16i8 (VEXTRACTI128rr
975 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
979 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
980 // smaller extract to enable EVEX->VEX.
981 let Predicates = [HasVLX] in {
982 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
983 (v2i64 (VEXTRACTI32x4Z256rr
984 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
986 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
987 (v2f64 (VEXTRACTF32x4Z256rr
988 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
990 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
991 (v4i32 (VEXTRACTI32x4Z256rr
992 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
994 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
995 (v4f32 (VEXTRACTF32x4Z256rr
996 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
998 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
999 (v8i16 (VEXTRACTI32x4Z256rr
1000 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1002 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1003 (v16i8 (VEXTRACTI32x4Z256rr
1004 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1009 // Additional patterns for handling a bitcast between the vselect and the
1010 // extract_subvector.
1011 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1012 X86VectorVTInfo To, X86VectorVTInfo Cast,
1013 PatFrag vextract_extract,
1014 SDNodeXForm EXTRACT_get_vextract_imm,
1015 list<Predicate> p> {
1016 let Predicates = p in {
1017 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1019 (To.VT (vextract_extract:$ext
1020 (From.VT From.RC:$src), (iPTR imm)))),
1022 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1023 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1024 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1026 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
1028 (To.VT (vextract_extract:$ext
1029 (From.VT From.RC:$src), (iPTR imm)))),
1030 Cast.ImmAllZerosV)),
1031 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1032 Cast.KRCWM:$mask, From.RC:$src,
1033 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1037 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1038 v4f32x_info, vextract128_extract,
1039 EXTRACT_get_vextract128_imm, [HasVLX]>;
1040 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1041 v2f64x_info, vextract128_extract,
1042 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1044 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1045 v4i32x_info, vextract128_extract,
1046 EXTRACT_get_vextract128_imm, [HasVLX]>;
1047 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1048 v4i32x_info, vextract128_extract,
1049 EXTRACT_get_vextract128_imm, [HasVLX]>;
1050 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1051 v4i32x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasVLX]>;
1053 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1054 v2i64x_info, vextract128_extract,
1055 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1056 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1057 v2i64x_info, vextract128_extract,
1058 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1059 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1060 v2i64x_info, vextract128_extract,
1061 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1063 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1064 v4f32x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1067 v2f64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI]>;
1070 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1071 v4i32x_info, vextract128_extract,
1072 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1073 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1074 v4i32x_info, vextract128_extract,
1075 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1077 v4i32x_info, vextract128_extract,
1078 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1079 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1080 v2i64x_info, vextract128_extract,
1081 EXTRACT_get_vextract128_imm, [HasDQI]>;
1082 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1083 v2i64x_info, vextract128_extract,
1084 EXTRACT_get_vextract128_imm, [HasDQI]>;
1085 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1086 v2i64x_info, vextract128_extract,
1087 EXTRACT_get_vextract128_imm, [HasDQI]>;
1089 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1090 v8f32x_info, vextract256_extract,
1091 EXTRACT_get_vextract256_imm, [HasDQI]>;
1092 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1093 v4f64x_info, vextract256_extract,
1094 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1096 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1097 v8i32x_info, vextract256_extract,
1098 EXTRACT_get_vextract256_imm, [HasDQI]>;
1099 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1100 v8i32x_info, vextract256_extract,
1101 EXTRACT_get_vextract256_imm, [HasDQI]>;
1102 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1103 v8i32x_info, vextract256_extract,
1104 EXTRACT_get_vextract256_imm, [HasDQI]>;
1105 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1106 v4i64x_info, vextract256_extract,
1107 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1108 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1109 v4i64x_info, vextract256_extract,
1110 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1111 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1112 v4i64x_info, vextract256_extract,
1113 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1115 // vextractps - extract 32 bits from XMM
1116 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1117 (ins VR128X:$src1, u8imm:$src2),
1118 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1119 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))],
1120 IIC_SSE_EXTRACTPS_RR>, EVEX, VEX_WIG, Sched<[WriteFShuffle]>;
1122 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1123 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1124 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1125 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1126 addr:$dst)], IIC_SSE_EXTRACTPS_RM>,
1127 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteFShuffleLd]>;
1129 //===---------------------------------------------------------------------===//
1130 // AVX-512 BROADCAST
1132 // broadcast with a scalar argument.
1133 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1134 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1135 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1136 (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
1137 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1138 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1139 (X86VBroadcast SrcInfo.FRC:$src),
1140 DestInfo.RC:$src0)),
1141 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
1142 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1143 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1144 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
1145 (X86VBroadcast SrcInfo.FRC:$src),
1146 DestInfo.ImmAllZerosV)),
1147 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
1148 DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
1151 // Split version to allow mask and broadcast node to be different types. This
1152 // helps support the 32x2 broadcasts.
1153 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1154 SchedWrite SchedRR, SchedWrite SchedRM,
1155 X86VectorVTInfo MaskInfo,
1156 X86VectorVTInfo DestInfo,
1157 X86VectorVTInfo SrcInfo,
1158 SDPatternOperator UnmaskedOp = X86VBroadcast> {
1159 let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in {
1160 defm r : AVX512_maskable_split<opc, MRMSrcReg, MaskInfo,
1161 (outs MaskInfo.RC:$dst),
1162 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
1166 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))),
1170 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1171 NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
1173 defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
1174 (outs MaskInfo.RC:$dst),
1175 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
1178 (DestInfo.VT (UnmaskedOp
1179 (SrcInfo.ScalarLdFrag addr:$src))))),
1182 (DestInfo.VT (X86VBroadcast
1183 (SrcInfo.ScalarLdFrag addr:$src))))),
1184 NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
1188 def : Pat<(MaskInfo.VT
1190 (DestInfo.VT (UnmaskedOp
1191 (SrcInfo.VT (scalar_to_vector
1192 (SrcInfo.ScalarLdFrag addr:$src))))))),
1193 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#m) addr:$src)>;
1194 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1198 (SrcInfo.VT (scalar_to_vector
1199 (SrcInfo.ScalarLdFrag addr:$src)))))),
1200 MaskInfo.RC:$src0)),
1201 (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
1202 MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask, addr:$src)>;
1203 def : Pat<(MaskInfo.VT (vselect MaskInfo.KRCWM:$mask,
1207 (SrcInfo.VT (scalar_to_vector
1208 (SrcInfo.ScalarLdFrag addr:$src)))))),
1209 MaskInfo.ImmAllZerosV)),
1210 (!cast<Instruction>(NAME#MaskInfo.ZSuffix#mkz)
1211 MaskInfo.KRCWM:$mask, addr:$src)>;
1214 // Helper class to force mask and broadcast result to same type.
1215 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
1216 SchedWrite SchedRR, SchedWrite SchedRM,
1217 X86VectorVTInfo DestInfo,
1218 X86VectorVTInfo SrcInfo> :
1219 avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
1220 DestInfo, DestInfo, SrcInfo>;
1222 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1223 AVX512VLVectorVTInfo _> {
1224 let Predicates = [HasAVX512] in
1225 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1226 WriteFShuffle256Ld, _.info512, _.info128>,
1227 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1230 let Predicates = [HasVLX] in {
1231 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1232 WriteFShuffle256Ld, _.info256, _.info128>,
1233 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1238 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1239 AVX512VLVectorVTInfo _> {
1240 let Predicates = [HasAVX512] in
1241 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1242 WriteFShuffle256Ld, _.info512, _.info128>,
1243 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
1246 let Predicates = [HasVLX] in {
1247 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1248 WriteFShuffle256Ld, _.info256, _.info128>,
1249 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
1251 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
1252 WriteFShuffle256Ld, _.info128, _.info128>,
1253 avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
1257 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1259 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1260 avx512vl_f64_info>, VEX_W;
1262 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
1263 (VBROADCASTSSZm addr:$src)>;
1264 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
1265 (VBROADCASTSDZm addr:$src)>;
1267 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1268 X86VectorVTInfo _, SDPatternOperator OpNode,
1269 RegisterClass SrcRC> {
1270 let ExeDomain = _.ExeDomain in
1271 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1273 "vpbroadcast"##_.Suffix, "$src", "$src",
1274 (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
1278 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1279 X86VectorVTInfo _, SDPatternOperator OpNode,
1280 RegisterClass SrcRC, SubRegIndex Subreg> {
1281 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1282 defm r : AVX512_maskable_custom<opc, MRMSrcReg,
1283 (outs _.RC:$dst), (ins GR32:$src),
1284 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1285 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1286 "vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
1287 NoItinerary, "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1289 def : Pat <(_.VT (OpNode SrcRC:$src)),
1290 (!cast<Instruction>(Name#r)
1291 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1293 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1294 (!cast<Instruction>(Name#rk) _.RC:$src0, _.KRCWM:$mask,
1295 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1297 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1298 (!cast<Instruction>(Name#rkz) _.KRCWM:$mask,
1299 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1302 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1303 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1304 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1305 let Predicates = [prd] in
1306 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1307 OpNode, SrcRC, Subreg>, EVEX_V512;
1308 let Predicates = [prd, HasVLX] in {
1309 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1310 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1311 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1312 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1316 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1317 SDPatternOperator OpNode,
1318 RegisterClass SrcRC, Predicate prd> {
1319 let Predicates = [prd] in
1320 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1322 let Predicates = [prd, HasVLX] in {
1323 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1325 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1330 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1331 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1332 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1333 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1335 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1336 X86VBroadcast, GR32, HasAVX512>;
1337 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1338 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1340 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
1341 (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
1342 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
1343 (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
1345 // Provide aliases for broadcast from the same register class that
1346 // automatically does the extract.
1347 multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1348 X86VectorVTInfo SrcInfo> {
1349 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1350 (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1351 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1354 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1355 AVX512VLVectorVTInfo _, Predicate prd> {
1356 let Predicates = [prd] in {
1357 defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1358 WriteShuffle256Ld, _.info512, _.info128>,
1359 avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1361 // Defined separately to avoid redefinition.
1362 defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1364 let Predicates = [prd, HasVLX] in {
1365 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
1366 WriteShuffle256Ld, _.info256, _.info128>,
1367 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1369 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
1370 WriteShuffleLd, _.info128, _.info128>,
1375 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1376 avx512vl_i8_info, HasBWI>;
1377 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1378 avx512vl_i16_info, HasBWI>;
1379 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1380 avx512vl_i32_info, HasAVX512>;
1381 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1382 avx512vl_i64_info, HasAVX512>, VEX_W;
1384 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1385 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1386 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1387 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1388 (_Dst.VT (X86SubVBroadcast
1389 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1390 NoItinerary>, AVX5128IBase, EVEX,
1391 Sched<[WriteShuffleLd]>;
1394 // This should be used for the AVX512DQ broadcast instructions. It disables
1395 // the unmasked patterns so that we only use the DQ instructions when masking
1397 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1398 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1399 let hasSideEffects = 0, mayLoad = 1 in
1400 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1401 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1403 (_Dst.VT (X86SubVBroadcast
1404 (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
1405 NoItinerary>, AVX5128IBase, EVEX,
1406 Sched<[WriteShuffleLd]>;
1409 let Predicates = [HasAVX512] in {
1410 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1411 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1412 (VPBROADCASTQZm addr:$src)>;
1415 let Predicates = [HasVLX] in {
1416 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1417 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1418 (VPBROADCASTQZ128m addr:$src)>;
1419 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1420 (VPBROADCASTQZ256m addr:$src)>;
1422 let Predicates = [HasVLX, HasBWI] in {
1423 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1424 // This means we'll encounter truncated i32 loads; match that here.
1425 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1426 (VPBROADCASTWZ128m addr:$src)>;
1427 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1428 (VPBROADCASTWZ256m addr:$src)>;
1429 def : Pat<(v8i16 (X86VBroadcast
1430 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1431 (VPBROADCASTWZ128m addr:$src)>;
1432 def : Pat<(v16i16 (X86VBroadcast
1433 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1434 (VPBROADCASTWZ256m addr:$src)>;
1437 //===----------------------------------------------------------------------===//
1438 // AVX-512 BROADCAST SUBVECTORS
1441 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1442 v16i32_info, v4i32x_info>,
1443 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1444 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1445 v16f32_info, v4f32x_info>,
1446 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1447 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1448 v8i64_info, v4i64x_info>, VEX_W,
1449 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1450 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1451 v8f64_info, v4f64x_info>, VEX_W,
1452 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1454 let Predicates = [HasAVX512] in {
1455 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1456 (VBROADCASTF64X4rm addr:$src)>;
1457 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1458 (VBROADCASTI64X4rm addr:$src)>;
1459 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1460 (VBROADCASTI64X4rm addr:$src)>;
1461 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1462 (VBROADCASTI64X4rm addr:$src)>;
1464 // Provide fallback in case the load node that is used in the patterns above
1465 // is used by additional users, which prevents the pattern selection.
1466 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1467 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1468 (v4f64 VR256X:$src), 1)>;
1469 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1470 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1471 (v8f32 VR256X:$src), 1)>;
1472 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1473 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1474 (v4i64 VR256X:$src), 1)>;
1475 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1476 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1477 (v8i32 VR256X:$src), 1)>;
1478 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1479 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1480 (v16i16 VR256X:$src), 1)>;
1481 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1482 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1483 (v32i8 VR256X:$src), 1)>;
1485 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1486 (VBROADCASTF32X4rm addr:$src)>;
1487 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1488 (VBROADCASTI32X4rm addr:$src)>;
1489 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1490 (VBROADCASTI32X4rm addr:$src)>;
1491 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1492 (VBROADCASTI32X4rm addr:$src)>;
1495 let Predicates = [HasVLX] in {
1496 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1497 v8i32x_info, v4i32x_info>,
1498 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1499 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1500 v8f32x_info, v4f32x_info>,
1501 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1503 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1504 (VBROADCASTF32X4Z256rm addr:$src)>;
1505 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1506 (VBROADCASTI32X4Z256rm addr:$src)>;
1507 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1508 (VBROADCASTI32X4Z256rm addr:$src)>;
1509 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1510 (VBROADCASTI32X4Z256rm addr:$src)>;
1512 // Provide fallback in case the load node that is used in the patterns above
1513 // is used by additional users, which prevents the pattern selection.
1514 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1515 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1516 (v2f64 VR128X:$src), 1)>;
1517 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1518 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1519 (v4f32 VR128X:$src), 1)>;
1520 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1521 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1522 (v2i64 VR128X:$src), 1)>;
1523 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1524 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1525 (v4i32 VR128X:$src), 1)>;
1526 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1527 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1528 (v8i16 VR128X:$src), 1)>;
1529 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1530 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1531 (v16i8 VR128X:$src), 1)>;
1534 let Predicates = [HasVLX, HasDQI] in {
1535 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1536 v4i64x_info, v2i64x_info>, VEX_W,
1537 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1538 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1539 v4f64x_info, v2f64x_info>, VEX_W,
1540 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1543 let Predicates = [HasDQI] in {
1544 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1545 v8i64_info, v2i64x_info>, VEX_W,
1546 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1547 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1548 v16i32_info, v8i32x_info>,
1549 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1550 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1551 v8f64_info, v2f64x_info>, VEX_W,
1552 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1553 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1554 v16f32_info, v8f32x_info>,
1555 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1558 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1559 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1560 let Predicates = [HasDQI] in
1561 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1562 WriteShuffle256Ld, _Dst.info512,
1563 _Src.info512, _Src.info128, null_frag>,
1565 let Predicates = [HasDQI, HasVLX] in
1566 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
1567 WriteShuffle256Ld, _Dst.info256,
1568 _Src.info256, _Src.info128, null_frag>,
1572 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1573 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1574 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1576 let Predicates = [HasDQI, HasVLX] in
1577 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
1578 WriteShuffleLd, _Dst.info128,
1579 _Src.info128, _Src.info128, null_frag>,
1583 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1584 avx512vl_i32_info, avx512vl_i64_info>;
1585 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1586 avx512vl_f32_info, avx512vl_f64_info>;
1588 let Predicates = [HasVLX] in {
1589 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1590 (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1591 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1592 (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1595 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1596 (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
1597 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1598 (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1600 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1601 (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
1602 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1603 (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1605 //===----------------------------------------------------------------------===//
1606 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1608 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1609 X86VectorVTInfo _, RegisterClass KRC> {
1610 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1611 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1612 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
1613 IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
1616 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1617 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1618 let Predicates = [HasCDI] in
1619 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1620 let Predicates = [HasCDI, HasVLX] in {
1621 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1622 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1626 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1627 avx512vl_i32_info, VK16>;
1628 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1629 avx512vl_i64_info, VK8>, VEX_W;
1631 //===----------------------------------------------------------------------===//
1632 // -- VPERMI2 - 3 source operands form --
1634 let Sched = WriteFShuffle256 in
1635 def AVX512_PERM2_F : OpndItins<
1636 IIC_SSE_SHUFP, IIC_SSE_SHUFP
1639 let Sched = WriteShuffle256 in
1640 def AVX512_PERM2_I : OpndItins<
1641 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
1644 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins,
1645 X86VectorVTInfo _> {
1646 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1647 // The index operand in the pattern should really be an integer type. However,
1648 // if we do that and it happens to come from a bitcast, then it becomes
1649 // difficult to find the bitcast needed to convert the index to the
1650 // destination type for the passthru since it will be folded with the bitcast
1651 // of the index operand.
1652 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1653 (ins _.RC:$src2, _.RC:$src3),
1654 OpcodeStr, "$src3, $src2", "$src2, $src3",
1655 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)),
1656 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
1658 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1659 (ins _.RC:$src2, _.MemOp:$src3),
1660 OpcodeStr, "$src3, $src2", "$src2, $src3",
1661 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
1662 (_.VT (bitconvert (_.LdFrag addr:$src3))))), itins.rm, 1>,
1663 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
1667 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
1668 X86VectorVTInfo _> {
1669 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1670 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1671 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1672 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1673 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1674 (_.VT (X86VPermi2X _.RC:$src1,
1675 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1676 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1677 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1680 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
1681 AVX512VLVectorVTInfo VTInfo> {
1682 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>,
1683 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1684 let Predicates = [HasVLX] in {
1685 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>,
1686 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1687 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>,
1688 avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1692 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1694 AVX512VLVectorVTInfo VTInfo,
1696 let Predicates = [Prd] in
1697 defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1698 let Predicates = [Prd, HasVLX] in {
1699 defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1700 defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1704 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I,
1705 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1706 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I,
1707 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1708 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I,
1709 avx512vl_i16_info, HasBWI>,
1710 VEX_W, EVEX_CD8<16, CD8VF>;
1711 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I,
1712 avx512vl_i8_info, HasVBMI>,
1714 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F,
1715 avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
1716 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F,
1717 avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1720 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins,
1721 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1722 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1723 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1724 (ins IdxVT.RC:$src2, _.RC:$src3),
1725 OpcodeStr, "$src3, $src2", "$src2, $src3",
1726 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)),
1727 itins.rr, 1>, EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>;
1729 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1730 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1731 OpcodeStr, "$src3, $src2", "$src2, $src3",
1732 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1733 (bitconvert (_.LdFrag addr:$src3)))), itins.rm, 1>,
1734 EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>;
1737 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins,
1738 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1739 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1740 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1741 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1742 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1743 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1744 (_.VT (X86VPermt2 _.RC:$src1,
1745 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1746 itins.rm, 1>, AVX5128IBase, EVEX_4V, EVEX_B,
1747 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1750 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins,
1751 AVX512VLVectorVTInfo VTInfo,
1752 AVX512VLVectorVTInfo ShuffleMask> {
1753 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
1754 ShuffleMask.info512>,
1755 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512,
1756 ShuffleMask.info512>, EVEX_V512;
1757 let Predicates = [HasVLX] in {
1758 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
1759 ShuffleMask.info128>,
1760 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128,
1761 ShuffleMask.info128>, EVEX_V128;
1762 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
1763 ShuffleMask.info256>,
1764 avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256,
1765 ShuffleMask.info256>, EVEX_V256;
1769 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins,
1770 AVX512VLVectorVTInfo VTInfo,
1771 AVX512VLVectorVTInfo Idx,
1773 let Predicates = [Prd] in
1774 defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512,
1775 Idx.info512>, EVEX_V512;
1776 let Predicates = [Prd, HasVLX] in {
1777 defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128,
1778 Idx.info128>, EVEX_V128;
1779 defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256,
1780 Idx.info256>, EVEX_V256;
1784 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I,
1785 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1786 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I,
1787 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1788 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I,
1789 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1790 VEX_W, EVEX_CD8<16, CD8VF>;
1791 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I,
1792 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1794 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F,
1795 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1796 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F,
1797 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1799 //===----------------------------------------------------------------------===//
1800 // AVX-512 - BLEND using mask
1803 let Sched = WriteFVarBlend in
1804 def AVX512_BLENDM : OpndItins<
1805 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
1808 let Sched = WriteVarBlend in
1809 def AVX512_PBLENDM : OpndItins<
1810 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
1813 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins,
1814 X86VectorVTInfo _> {
1815 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1816 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1817 (ins _.RC:$src1, _.RC:$src2),
1818 !strconcat(OpcodeStr,
1819 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1820 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1821 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1822 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1823 !strconcat(OpcodeStr,
1824 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1825 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
1826 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1827 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1828 !strconcat(OpcodeStr,
1829 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1830 [], itins.rr>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>;
1831 let mayLoad = 1 in {
1832 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1833 (ins _.RC:$src1, _.MemOp:$src2),
1834 !strconcat(OpcodeStr,
1835 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1836 [], itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1837 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1838 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1839 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1840 !strconcat(OpcodeStr,
1841 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1842 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1843 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1844 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1845 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1846 !strconcat(OpcodeStr,
1847 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1848 [], itins.rm>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1849 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1853 multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins,
1854 X86VectorVTInfo _> {
1855 let mayLoad = 1, hasSideEffects = 0 in {
1856 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1857 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1858 !strconcat(OpcodeStr,
1859 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1860 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1861 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1862 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1864 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1865 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1866 !strconcat(OpcodeStr,
1867 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1868 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1869 [], itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1870 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1874 multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins,
1875 AVX512VLVectorVTInfo VTInfo> {
1876 defm Z : avx512_blendmask <opc, OpcodeStr, itins, VTInfo.info512>,
1877 avx512_blendmask_rmb <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1879 let Predicates = [HasVLX] in {
1880 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>,
1881 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1882 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>,
1883 avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1887 multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins,
1888 AVX512VLVectorVTInfo VTInfo> {
1889 let Predicates = [HasBWI] in
1890 defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512;
1892 let Predicates = [HasBWI, HasVLX] in {
1893 defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256;
1894 defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128;
1899 defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>;
1900 defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W;
1901 defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>;
1902 defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W;
1903 defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>;
1904 defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W;
1907 //===----------------------------------------------------------------------===//
1908 // Compare Instructions
1909 //===----------------------------------------------------------------------===//
1911 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1913 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd,
1915 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1917 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1918 "vcmp${cc}"#_.Suffix,
1919 "$src2, $src1", "$src1, $src2",
1920 (OpNode (_.VT _.RC:$src1),
1922 imm:$cc), itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1924 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1926 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
1927 "vcmp${cc}"#_.Suffix,
1928 "$src2, $src1", "$src1, $src2",
1929 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
1930 imm:$cc), itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1931 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1933 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1935 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1936 "vcmp${cc}"#_.Suffix,
1937 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
1938 (OpNodeRnd (_.VT _.RC:$src1),
1941 (i32 FROUND_NO_EXC)), itins.rr>,
1942 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
1943 // Accept explicit immediate argument form instead of comparison code.
1944 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1945 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1947 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1949 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, EVEX_4V,
1950 Sched<[itins.Sched]>;
1952 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1954 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1956 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
1957 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1958 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1960 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1962 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1964 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", itins.rr>,
1965 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
1966 }// let isAsmParserOnly = 1, hasSideEffects = 0
1968 let isCodeGenOnly = 1 in {
1969 let isCommutable = 1 in
1970 def rr : AVX512Ii8<0xC2, MRMSrcReg,
1971 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
1972 !strconcat("vcmp${cc}", _.Suffix,
1973 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1974 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1977 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
1978 def rm : AVX512Ii8<0xC2, MRMSrcMem,
1980 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1981 !strconcat("vcmp${cc}", _.Suffix,
1982 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1983 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1984 (_.ScalarLdFrag addr:$src2),
1986 itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>,
1987 Sched<[itins.Sched.Folded, ReadAfterLd]>;
1991 let Predicates = [HasAVX512] in {
1992 let ExeDomain = SSEPackedSingle in
1993 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd,
1994 SSE_ALU_F32S>, AVX512XSIi8Base;
1995 let ExeDomain = SSEPackedDouble in
1996 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd,
1997 SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W;
2000 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
2001 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> {
2002 let isCommutable = IsCommutable in
2003 def rr : AVX512BI<opc, MRMSrcReg,
2004 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2005 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2006 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
2007 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2008 def rm : AVX512BI<opc, MRMSrcMem,
2009 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2010 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2011 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2012 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
2013 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2014 let isCommutable = IsCommutable in
2015 def rrk : AVX512BI<opc, MRMSrcReg,
2016 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2017 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2018 "$dst {${mask}}, $src1, $src2}"),
2019 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2020 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
2021 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2022 def rmk : AVX512BI<opc, MRMSrcMem,
2023 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2024 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2025 "$dst {${mask}}, $src1, $src2}"),
2026 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2027 (OpNode (_.VT _.RC:$src1),
2029 (_.LdFrag addr:$src2))))))],
2030 itins.rm>, EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2033 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
2034 OpndItins itins, X86VectorVTInfo _, bit IsCommutable> :
2035 avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> {
2036 def rmb : AVX512BI<opc, MRMSrcMem,
2037 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2038 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2039 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2040 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2041 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
2042 itins.rm>, EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2043 def rmbk : AVX512BI<opc, MRMSrcMem,
2044 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2045 _.ScalarMemOp:$src2),
2046 !strconcat(OpcodeStr,
2047 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2048 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2049 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2050 (OpNode (_.VT _.RC:$src1),
2052 (_.ScalarLdFrag addr:$src2)))))],
2053 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2054 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2057 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
2058 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2059 Predicate prd, bit IsCommutable = 0> {
2060 let Predicates = [prd] in
2061 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
2062 IsCommutable>, EVEX_V512;
2064 let Predicates = [prd, HasVLX] in {
2065 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
2066 IsCommutable>, EVEX_V256;
2067 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
2068 IsCommutable>, EVEX_V128;
2072 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2073 SDNode OpNode, OpndItins itins,
2074 AVX512VLVectorVTInfo VTInfo,
2075 Predicate prd, bit IsCommutable = 0> {
2076 let Predicates = [prd] in
2077 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512,
2078 IsCommutable>, EVEX_V512;
2080 let Predicates = [prd, HasVLX] in {
2081 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256,
2082 IsCommutable>, EVEX_V256;
2083 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128,
2084 IsCommutable>, EVEX_V128;
2088 // FIXME: Is there a better scheduler itinerary for VPCMP?
2089 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
2090 SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
2091 EVEX_CD8<8, CD8VF>, VEX_WIG;
2093 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
2094 SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
2095 EVEX_CD8<16, CD8VF>, VEX_WIG;
2097 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
2098 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
2099 EVEX_CD8<32, CD8VF>;
2101 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
2102 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
2103 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2105 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
2106 SSE_ALU_F32P, avx512vl_i8_info, HasBWI>,
2107 EVEX_CD8<8, CD8VF>, VEX_WIG;
2109 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
2110 SSE_ALU_F32P, avx512vl_i16_info, HasBWI>,
2111 EVEX_CD8<16, CD8VF>, VEX_WIG;
2113 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
2114 SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>,
2115 EVEX_CD8<32, CD8VF>;
2117 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
2118 SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>,
2119 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2121 // Transforms to swizzle an immediate to help matching memory operand in first
2123 def CommutePCMPCC : SDNodeXForm<imm, [{
2124 uint8_t Imm = N->getZExtValue() & 0x7;
2126 default: llvm_unreachable("Unreachable!");
2127 case 0x01: Imm = 0x06; break; // LT -> NLE
2128 case 0x02: Imm = 0x05; break; // LE -> NLT
2129 case 0x05: Imm = 0x02; break; // NLT -> LE
2130 case 0x06: Imm = 0x01; break; // NLE -> LT
2137 return getI8Imm(Imm, SDLoc(N));
2140 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
2141 OpndItins itins, X86VectorVTInfo _> {
2142 let isCommutable = 1 in
2143 def rri : AVX512AIi8<opc, MRMSrcReg,
2144 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
2145 !strconcat("vpcmp${cc}", Suffix,
2146 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2147 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2149 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2150 def rmi : AVX512AIi8<opc, MRMSrcMem,
2151 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
2152 !strconcat("vpcmp${cc}", Suffix,
2153 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2154 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2155 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2157 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2158 let isCommutable = 1 in
2159 def rrik : AVX512AIi8<opc, MRMSrcReg,
2160 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2162 !strconcat("vpcmp${cc}", Suffix,
2163 "\t{$src2, $src1, $dst {${mask}}|",
2164 "$dst {${mask}}, $src1, $src2}"),
2165 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2166 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2168 itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2169 def rmik : AVX512AIi8<opc, MRMSrcMem,
2170 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2172 !strconcat("vpcmp${cc}", Suffix,
2173 "\t{$src2, $src1, $dst {${mask}}|",
2174 "$dst {${mask}}, $src1, $src2}"),
2175 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2176 (OpNode (_.VT _.RC:$src1),
2177 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2179 itins.rm>, EVEX_4V, EVEX_K,
2180 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2182 // Accept explicit immediate argument form instead of comparison code.
2183 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2184 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2185 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2186 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2187 "$dst, $src1, $src2, $cc}"),
2188 [], itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
2190 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2191 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2192 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2193 "$dst, $src1, $src2, $cc}"),
2194 [], itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2195 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2196 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2198 !strconcat("vpcmp", Suffix,
2199 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2200 "$dst {${mask}}, $src1, $src2, $cc}"),
2201 [], itins.rr>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>;
2203 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2204 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2206 !strconcat("vpcmp", Suffix,
2207 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2208 "$dst {${mask}}, $src1, $src2, $cc}"),
2209 [], itins.rm>, EVEX_4V, EVEX_K,
2210 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2213 def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)),
2214 (_.VT _.RC:$src1), imm:$cc),
2215 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2216 (CommutePCMPCC imm:$cc))>;
2218 def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)),
2219 (_.VT _.RC:$src1), imm:$cc)),
2220 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2221 _.RC:$src1, addr:$src2,
2222 (CommutePCMPCC imm:$cc))>;
2225 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
2226 OpndItins itins, X86VectorVTInfo _> :
2227 avx512_icmp_cc<opc, Suffix, OpNode, itins, _> {
2228 def rmib : AVX512AIi8<opc, MRMSrcMem,
2229 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2231 !strconcat("vpcmp${cc}", Suffix,
2232 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2233 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2234 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2235 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2237 itins.rm>, EVEX_4V, EVEX_B,
2238 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2239 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2240 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2241 _.ScalarMemOp:$src2, AVX512ICC:$cc),
2242 !strconcat("vpcmp${cc}", Suffix,
2243 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2244 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2245 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2246 (OpNode (_.VT _.RC:$src1),
2247 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2249 itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2250 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2252 // Accept explicit immediate argument form instead of comparison code.
2253 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2254 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2255 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2257 !strconcat("vpcmp", Suffix,
2258 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2259 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2260 [], itins.rm>, EVEX_4V, EVEX_B,
2261 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2262 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2263 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2264 _.ScalarMemOp:$src2, u8imm:$cc),
2265 !strconcat("vpcmp", Suffix,
2266 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2267 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2268 [], itins.rm>, EVEX_4V, EVEX_K, EVEX_B,
2269 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2272 def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2273 (_.VT _.RC:$src1), imm:$cc),
2274 (!cast<Instruction>(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2,
2275 (CommutePCMPCC imm:$cc))>;
2277 def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast
2278 (_.ScalarLdFrag addr:$src2)),
2279 (_.VT _.RC:$src1), imm:$cc)),
2280 (!cast<Instruction>(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask,
2281 _.RC:$src1, addr:$src2,
2282 (CommutePCMPCC imm:$cc))>;
2285 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
2286 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2288 let Predicates = [prd] in
2289 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>,
2292 let Predicates = [prd, HasVLX] in {
2293 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>,
2295 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>,
2300 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
2301 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
2303 let Predicates = [prd] in
2304 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>,
2307 let Predicates = [prd, HasVLX] in {
2308 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>,
2310 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>,
2315 // FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU?
2316 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P,
2317 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2318 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P,
2319 avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>;
2321 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P,
2322 avx512vl_i16_info, HasBWI>,
2323 VEX_W, EVEX_CD8<16, CD8VF>;
2324 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P,
2325 avx512vl_i16_info, HasBWI>,
2326 VEX_W, EVEX_CD8<16, CD8VF>;
2328 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P,
2329 avx512vl_i32_info, HasAVX512>,
2330 EVEX_CD8<32, CD8VF>;
2331 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P,
2332 avx512vl_i32_info, HasAVX512>,
2333 EVEX_CD8<32, CD8VF>;
2335 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P,
2336 avx512vl_i64_info, HasAVX512>,
2337 VEX_W, EVEX_CD8<64, CD8VF>;
2338 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P,
2339 avx512vl_i64_info, HasAVX512>,
2340 VEX_W, EVEX_CD8<64, CD8VF>;
2343 multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
2344 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2345 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2346 "vcmp${cc}"#_.Suffix,
2347 "$src2, $src1", "$src1, $src2",
2348 (X86cmpm (_.VT _.RC:$src1),
2350 imm:$cc), itins.rr, 1>,
2351 Sched<[itins.Sched]>;
2353 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2354 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2355 "vcmp${cc}"#_.Suffix,
2356 "$src2, $src1", "$src1, $src2",
2357 (X86cmpm (_.VT _.RC:$src1),
2358 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2359 imm:$cc), itins.rm>,
2360 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2362 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2364 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2365 "vcmp${cc}"#_.Suffix,
2366 "${src2}"##_.BroadcastStr##", $src1",
2367 "$src1, ${src2}"##_.BroadcastStr,
2368 (X86cmpm (_.VT _.RC:$src1),
2369 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2370 imm:$cc), itins.rm>,
2371 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2372 // Accept explicit immediate argument form instead of comparison code.
2373 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2374 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2376 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2378 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
2379 Sched<[itins.Sched]>;
2381 let mayLoad = 1 in {
2382 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2384 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2386 "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
2387 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2389 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2391 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2393 "$cc, ${src2}"##_.BroadcastStr##", $src1",
2394 "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
2395 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2399 // Patterns for selecting with loads in other operand.
2400 def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2401 CommutableCMPCC:$cc),
2402 (!cast<Instruction>(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2405 def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2),
2407 CommutableCMPCC:$cc)),
2408 (!cast<Instruction>(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask,
2409 _.RC:$src1, addr:$src2,
2412 def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2413 (_.VT _.RC:$src1), CommutableCMPCC:$cc),
2414 (!cast<Instruction>(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2417 def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast
2418 (_.ScalarLdFrag addr:$src2)),
2420 CommutableCMPCC:$cc)),
2421 (!cast<Instruction>(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2422 _.RC:$src1, addr:$src2,
2426 multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
2427 // comparison code form (VCMP[EQ/LT/LE/...]
2428 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2429 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2430 "vcmp${cc}"#_.Suffix,
2431 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2432 (X86cmpmRnd (_.VT _.RC:$src1),
2435 (i32 FROUND_NO_EXC)), itins.rr>,
2436 EVEX_B, Sched<[itins.Sched]>;
2438 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2439 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2441 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2443 "$cc, {sae}, $src2, $src1",
2444 "$src1, $src2, {sae}, $cc", itins.rr>,
2445 EVEX_B, Sched<[itins.Sched]>;
2449 multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
2450 let Predicates = [HasAVX512] in {
2451 defm Z : avx512_vcmp_common<itins, _.info512>,
2452 avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
2455 let Predicates = [HasAVX512,HasVLX] in {
2456 defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
2457 defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
2461 defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
2462 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2463 defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
2464 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2467 // Patterns to select fp compares with load as first operand.
2468 let Predicates = [HasAVX512] in {
2469 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2470 CommutableCMPCC:$cc)),
2471 (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>;
2473 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2474 CommutableCMPCC:$cc)),
2475 (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>;
2478 // ----------------------------------------------------------------
2480 //handle fpclass instruction mask = op(reg_scalar,imm)
2481 // op(mem_scalar,imm)
2482 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2483 OpndItins itins, X86VectorVTInfo _,
2485 let Predicates = [prd], ExeDomain = _.ExeDomain in {
2486 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2487 (ins _.RC:$src1, i32u8imm:$src2),
2488 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2489 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2490 (i32 imm:$src2)))], itins.rr>,
2491 Sched<[itins.Sched]>;
2492 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2493 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2494 OpcodeStr##_.Suffix#
2495 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2496 [(set _.KRC:$dst,(or _.KRCWM:$mask,
2497 (OpNode (_.VT _.RC:$src1),
2498 (i32 imm:$src2))))], itins.rr>,
2499 EVEX_K, Sched<[itins.Sched]>;
2500 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2501 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2502 OpcodeStr##_.Suffix##
2503 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2505 (OpNode _.ScalarIntMemCPat:$src1,
2506 (i32 imm:$src2)))], itins.rm>,
2507 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2508 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2509 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2510 OpcodeStr##_.Suffix##
2511 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2512 [(set _.KRC:$dst,(or _.KRCWM:$mask,
2513 (OpNode _.ScalarIntMemCPat:$src1,
2514 (i32 imm:$src2))))], itins.rm>,
2515 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2519 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2520 // fpclass(reg_vec, mem_vec, imm)
2521 // fpclass(reg_vec, broadcast(eltVt), imm)
2522 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2523 OpndItins itins, X86VectorVTInfo _,
2524 string mem, string broadcast>{
2525 let ExeDomain = _.ExeDomain in {
2526 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2527 (ins _.RC:$src1, i32u8imm:$src2),
2528 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2529 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2530 (i32 imm:$src2)))], itins.rr>,
2531 Sched<[itins.Sched]>;
2532 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2533 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2534 OpcodeStr##_.Suffix#
2535 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2536 [(set _.KRC:$dst,(or _.KRCWM:$mask,
2537 (OpNode (_.VT _.RC:$src1),
2538 (i32 imm:$src2))))], itins.rr>,
2539 EVEX_K, Sched<[itins.Sched]>;
2540 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2541 (ins _.MemOp:$src1, i32u8imm:$src2),
2542 OpcodeStr##_.Suffix##mem#
2543 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2544 [(set _.KRC:$dst,(OpNode
2545 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2546 (i32 imm:$src2)))], itins.rm>,
2547 Sched<[itins.Sched.Folded, ReadAfterLd]>;
2548 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2549 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2550 OpcodeStr##_.Suffix##mem#
2551 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2552 [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
2553 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2554 (i32 imm:$src2))))], itins.rm>,
2555 EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2556 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2557 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2558 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2559 _.BroadcastStr##", $dst|$dst, ${src1}"
2560 ##_.BroadcastStr##", $src2}",
2561 [(set _.KRC:$dst,(OpNode
2562 (_.VT (X86VBroadcast
2563 (_.ScalarLdFrag addr:$src1))),
2564 (i32 imm:$src2)))], itins.rm>,
2565 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2566 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2567 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2568 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2569 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2570 _.BroadcastStr##", $src2}",
2571 [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
2572 (_.VT (X86VBroadcast
2573 (_.ScalarLdFrag addr:$src1))),
2574 (i32 imm:$src2))))], itins.rm>,
2575 EVEX_B, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
2579 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2580 bits<8> opc, SDNode OpNode,
2581 OpndItins itins, Predicate prd,
2583 let Predicates = [prd] in {
2584 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2585 _.info512, "{z}", broadcast>, EVEX_V512;
2587 let Predicates = [prd, HasVLX] in {
2588 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2589 _.info128, "{x}", broadcast>, EVEX_V128;
2590 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins,
2591 _.info256, "{y}", broadcast>, EVEX_V256;
2595 // FIXME: Is there a better scheduler itinerary for VFPCLASS?
2596 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2597 bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
2598 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2599 VecOpNode, SSE_ALU_F32P, prd, "{l}">,
2600 EVEX_CD8<32, CD8VF>;
2601 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2602 VecOpNode, SSE_ALU_F64P, prd, "{q}">,
2603 EVEX_CD8<64, CD8VF> , VEX_W;
2604 defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2605 SSE_ALU_F32S, f32x_info, prd>,
2606 EVEX_CD8<32, CD8VT1>;
2607 defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2608 SSE_ALU_F64S, f64x_info, prd>,
2609 EVEX_CD8<64, CD8VT1>, VEX_W;
2612 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2613 X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
2615 //-----------------------------------------------------------------
2616 // Mask register copy, including
2617 // - copy between mask registers
2618 // - load/store mask registers
2619 // - copy from GPR to mask register and vice versa
2621 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2622 string OpcodeStr, RegisterClass KRC,
2623 ValueType vvt, X86MemOperand x86memop> {
2624 let hasSideEffects = 0, SchedRW = [WriteMove] in
2625 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2626 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2628 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2629 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2630 [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>;
2631 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2632 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2633 [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>;
2636 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2638 RegisterClass KRC, RegisterClass GRC> {
2639 let hasSideEffects = 0 in {
2640 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2641 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2642 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
2643 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2644 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
2645 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
2649 let Predicates = [HasDQI] in
2650 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2651 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2654 let Predicates = [HasAVX512] in
2655 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2656 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2659 let Predicates = [HasBWI] in {
2660 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2662 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2664 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2666 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2670 // GR from/to mask register
2671 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2672 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2673 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2674 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2676 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2677 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2678 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2679 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2681 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2682 (KMOVWrk VK16:$src)>;
2683 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2684 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2686 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2687 (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
2688 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2689 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2690 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2691 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2693 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2694 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2695 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2696 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2697 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2698 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2699 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2700 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2703 let Predicates = [HasDQI] in {
2704 def : Pat<(store VK4:$src, addr:$dst),
2705 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2706 def : Pat<(store VK2:$src, addr:$dst),
2707 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
2708 def : Pat<(store VK1:$src, addr:$dst),
2709 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2711 def : Pat<(v2i1 (load addr:$src)),
2712 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2713 def : Pat<(v4i1 (load addr:$src)),
2714 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2716 let Predicates = [HasAVX512, NoDQI] in {
2717 def : Pat<(store VK1:$src, addr:$dst),
2719 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
2721 def : Pat<(store VK2:$src, addr:$dst),
2723 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
2725 def : Pat<(store VK4:$src, addr:$dst),
2727 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
2729 def : Pat<(store VK8:$src, addr:$dst),
2731 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
2734 def : Pat<(v8i1 (load addr:$src)),
2735 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2736 def : Pat<(v2i1 (load addr:$src)),
2737 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
2738 def : Pat<(v4i1 (load addr:$src)),
2739 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
2742 let Predicates = [HasAVX512] in {
2743 def : Pat<(v1i1 (load addr:$src)),
2744 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK1)>;
2745 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2746 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2749 let Predicates = [HasAVX512] in {
2750 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2751 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2752 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2754 def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
2755 (COPY_TO_REGCLASS maskRC:$src, GR32)>;
2757 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2758 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2761 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2762 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2763 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2764 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2765 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2766 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2767 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2769 def : Pat<(X86kshiftr (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2771 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2772 GR8:$src, sub_8bit), (i32 1))), VK1)>;
2773 def : Pat<(X86kshiftr (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2775 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2776 GR8:$src, sub_8bit), (i32 1))), VK16)>;
2777 def : Pat<(X86kshiftr (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2779 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2780 GR8:$src, sub_8bit), (i32 1))), VK8)>;
2784 // Mask unary operation
2786 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2787 RegisterClass KRC, SDPatternOperator OpNode,
2788 OpndItins itins, Predicate prd> {
2789 let Predicates = [prd] in
2790 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2791 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2792 [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
2793 Sched<[itins.Sched]>;
2796 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2797 SDPatternOperator OpNode, OpndItins itins> {
2798 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2799 itins, HasDQI>, VEX, PD;
2800 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2801 itins, HasAVX512>, VEX, PS;
2802 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2803 itins, HasBWI>, VEX, PD, VEX_W;
2804 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2805 itins, HasBWI>, VEX, PS, VEX_W;
2808 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
2810 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2811 let Predicates = [HasAVX512, NoDQI] in
2812 def : Pat<(vnot VK8:$src),
2813 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2815 def : Pat<(vnot VK4:$src),
2816 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2817 def : Pat<(vnot VK2:$src),
2818 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2820 // Mask binary operation
2821 // - KAND, KANDN, KOR, KXNOR, KXOR
2822 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2823 RegisterClass KRC, SDPatternOperator OpNode,
2824 OpndItins itins, Predicate prd, bit IsCommutable> {
2825 let Predicates = [prd], isCommutable = IsCommutable in
2826 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2827 !strconcat(OpcodeStr,
2828 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2829 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2830 Sched<[itins.Sched]>;
2833 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2834 SDPatternOperator OpNode, OpndItins itins,
2835 bit IsCommutable, Predicate prdW = HasAVX512> {
2836 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2837 itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2838 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2839 itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2840 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2841 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2842 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2843 itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2846 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2847 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2848 // These nodes use 'vnot' instead of 'not' to support vectors.
2849 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2850 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2852 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>;
2853 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>;
2854 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
2855 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>;
2856 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
2857 defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, SSE_BIT_ITINS_P, 1, HasDQI>;
2859 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2861 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2862 // for the DQI set, this type is legal and KxxxB instruction is used
2863 let Predicates = [NoDQI] in
2864 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2866 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2867 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2869 // All types smaller than 8 bits require conversion anyway
2870 def : Pat<(OpNode VK1:$src1, VK1:$src2),
2871 (COPY_TO_REGCLASS (Inst
2872 (COPY_TO_REGCLASS VK1:$src1, VK16),
2873 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2874 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2875 (COPY_TO_REGCLASS (Inst
2876 (COPY_TO_REGCLASS VK2:$src1, VK16),
2877 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
2878 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2879 (COPY_TO_REGCLASS (Inst
2880 (COPY_TO_REGCLASS VK4:$src1, VK16),
2881 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
2884 defm : avx512_binop_pat<and, and, KANDWrr>;
2885 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2886 defm : avx512_binop_pat<or, or, KORWrr>;
2887 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2888 defm : avx512_binop_pat<xor, xor, KXORWrr>;
2891 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
2892 RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
2893 let Predicates = [prd] in {
2894 let hasSideEffects = 0 in
2895 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2896 (ins KRC:$src1, KRC:$src2),
2897 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
2898 itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
2900 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2901 (!cast<Instruction>(NAME##rr)
2902 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2903 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2907 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
2908 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
2909 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
2912 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2913 SDNode OpNode, OpndItins itins, Predicate prd> {
2914 let Predicates = [prd], Defs = [EFLAGS] in
2915 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2916 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2917 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
2918 Sched<[itins.Sched]>;
2921 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2922 OpndItins itins, Predicate prdW = HasAVX512> {
2923 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
2925 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
2927 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
2929 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
2933 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
2934 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
2937 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2938 SDNode OpNode, OpndItins itins> {
2939 let Predicates = [HasAVX512] in
2940 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
2941 !strconcat(OpcodeStr,
2942 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
2943 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
2944 itins.rr>, Sched<[itins.Sched]>;
2947 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
2948 SDNode OpNode, OpndItins itins> {
2949 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2950 itins>, VEX, TAPD, VEX_W;
2951 let Predicates = [HasDQI] in
2952 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2954 let Predicates = [HasBWI] in {
2955 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2956 itins>, VEX, TAPD, VEX_W;
2957 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2962 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
2963 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
2965 multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
2966 def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
2967 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
2968 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2969 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
2971 def : Pat<(v8i1 (and VK8:$mask,
2972 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
2974 (!cast<Instruction>(InstStr##Zrrk)
2975 (COPY_TO_REGCLASS VK8:$mask, VK16),
2976 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2977 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
2981 multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
2982 AVX512VLVectorVTInfo _> {
2983 def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
2984 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
2985 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2986 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
2989 def : Pat<(v8i1 (and VK8:$mask, (OpNode (_.info256.VT VR256X:$src1),
2990 (_.info256.VT VR256X:$src2), imm:$cc))),
2991 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
2992 (COPY_TO_REGCLASS VK8:$mask, VK16),
2993 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
2994 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
2998 let Predicates = [HasAVX512, NoVLX] in {
2999 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
3000 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
3002 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
3003 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
3004 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
3007 // Mask setting all 0s or 1s
3008 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3009 let Predicates = [HasAVX512] in
3010 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3011 SchedRW = [WriteZero] in
3012 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3013 [(set KRC:$dst, (VT Val))]>;
3016 multiclass avx512_mask_setop_w<PatFrag Val> {
3017 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3018 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3019 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3022 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3023 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3025 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3026 let Predicates = [HasAVX512] in {
3027 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3028 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3029 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3030 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3031 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3032 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3033 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3034 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3037 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3038 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3039 RegisterClass RC, ValueType VT> {
3040 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3041 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3043 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3044 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3046 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3047 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3048 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3049 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3050 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3051 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3053 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3054 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3055 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3056 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3057 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3059 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3060 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3061 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3062 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3064 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3065 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3066 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3068 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3069 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3071 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3073 //===----------------------------------------------------------------------===//
3074 // AVX-512 - Aligned and unaligned load and store
3078 multiclass avx512_load<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3079 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3080 bit NoRMPattern = 0,
3081 SDPatternOperator SelectOprr = vselect> {
3082 let hasSideEffects = 0 in {
3083 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3084 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3085 _.ExeDomain, itins.rr>, EVEX, Sched<[WriteMove]>;
3086 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3087 (ins _.KRCWM:$mask, _.RC:$src),
3088 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3089 "${dst} {${mask}} {z}, $src}"),
3090 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3092 _.ImmAllZerosV)))], _.ExeDomain,
3093 itins.rr>, EVEX, EVEX_KZ, Sched<[WriteMove]>;
3095 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3096 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3097 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3098 !if(NoRMPattern, [],
3100 (_.VT (bitconvert (ld_frag addr:$src))))]),
3101 _.ExeDomain, itins.rm>, EVEX, Sched<[WriteLoad]>;
3103 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3104 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3105 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3106 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3107 "${dst} {${mask}}, $src1}"),
3108 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3110 (_.VT _.RC:$src0))))], _.ExeDomain,
3111 itins.rr>, EVEX, EVEX_K, Sched<[WriteMove]>;
3112 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3113 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3114 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3115 "${dst} {${mask}}, $src1}"),
3116 [(set _.RC:$dst, (_.VT
3117 (vselect _.KRCWM:$mask,
3118 (_.VT (bitconvert (ld_frag addr:$src1))),
3119 (_.VT _.RC:$src0))))], _.ExeDomain, itins.rm>,
3120 EVEX, EVEX_K, Sched<[WriteLoad]>;
3122 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3123 (ins _.KRCWM:$mask, _.MemOp:$src),
3124 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3125 "${dst} {${mask}} {z}, $src}",
3126 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3127 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3128 _.ExeDomain, itins.rm>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
3130 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3131 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3133 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3134 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3136 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3137 (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3138 _.KRCWM:$mask, addr:$ptr)>;
3141 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3142 AVX512VLVectorVTInfo _,
3144 let Predicates = [prd] in
3145 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info512,
3146 _.info512.AlignedLdFrag, masked_load_aligned512>,
3149 let Predicates = [prd, HasVLX] in {
3150 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info256,
3151 _.info256.AlignedLdFrag, masked_load_aligned256>,
3153 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVA, _.info128,
3154 _.info128.AlignedLdFrag, masked_load_aligned128>,
3159 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3160 AVX512VLVectorVTInfo _,
3162 bit NoRMPattern = 0,
3163 SDPatternOperator SelectOprr = vselect> {
3164 let Predicates = [prd] in
3165 defm Z : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info512, _.info512.LdFrag,
3166 masked_load_unaligned, NoRMPattern,
3167 SelectOprr>, EVEX_V512;
3169 let Predicates = [prd, HasVLX] in {
3170 defm Z256 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info256, _.info256.LdFrag,
3171 masked_load_unaligned, NoRMPattern,
3172 SelectOprr>, EVEX_V256;
3173 defm Z128 : avx512_load<opc, OpcodeStr, SSE_MOVU, _.info128, _.info128.LdFrag,
3174 masked_load_unaligned, NoRMPattern,
3175 SelectOprr>, EVEX_V128;
3179 multiclass avx512_store<bits<8> opc, string OpcodeStr, MoveLoadStoreItins itins,
3180 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3181 string Name, bit NoMRPattern = 0> {
3182 let hasSideEffects = 0 in {
3183 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3184 OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
3185 [], _.ExeDomain, itins.rr>, EVEX, FoldGenData<Name#rr>,
3187 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3188 (ins _.KRCWM:$mask, _.RC:$src),
3189 OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3190 "${dst} {${mask}}, $src}",
3191 [], _.ExeDomain, itins.rr>, EVEX, EVEX_K,
3192 FoldGenData<Name#rrk>, Sched<[WriteMove]>;
3193 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3194 (ins _.KRCWM:$mask, _.RC:$src),
3195 OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
3196 "${dst} {${mask}} {z}, $src}",
3197 [], _.ExeDomain, itins.rr>, EVEX, EVEX_KZ,
3198 FoldGenData<Name#rrkz>, Sched<[WriteMove]>;
3201 let hasSideEffects = 0, mayStore = 1 in
3202 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3203 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3204 !if(NoMRPattern, [],
3205 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3206 _.ExeDomain, itins.mr>, EVEX, Sched<[WriteStore]>;
3207 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3208 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3209 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3210 [], _.ExeDomain, itins.mr>, EVEX, EVEX_K, Sched<[WriteStore]>;
3212 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3213 (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3214 _.KRCWM:$mask, _.RC:$src)>;
3218 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3219 AVX512VLVectorVTInfo _, Predicate prd,
3220 string Name, bit NoMRPattern = 0> {
3221 let Predicates = [prd] in
3222 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info512, store,
3223 masked_store_unaligned, Name#Z, NoMRPattern>, EVEX_V512;
3225 let Predicates = [prd, HasVLX] in {
3226 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info256, store,
3227 masked_store_unaligned, Name#Z256,
3228 NoMRPattern>, EVEX_V256;
3229 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVU, _.info128, store,
3230 masked_store_unaligned, Name#Z128,
3231 NoMRPattern>, EVEX_V128;
3235 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3236 AVX512VLVectorVTInfo _, Predicate prd,
3238 let Predicates = [prd] in
3239 defm Z : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info512, alignedstore,
3240 masked_store_aligned512, Name#Z>, EVEX_V512;
3242 let Predicates = [prd, HasVLX] in {
3243 defm Z256 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info256, alignedstore,
3244 masked_store_aligned256, Name#Z256>, EVEX_V256;
3245 defm Z128 : avx512_store<opc, OpcodeStr, SSE_MOVA, _.info128, alignedstore,
3246 masked_store_aligned128, Name#Z128>, EVEX_V128;
3250 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3252 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3253 HasAVX512, "VMOVAPS">,
3254 PS, EVEX_CD8<32, CD8VF>;
3256 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3258 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3259 HasAVX512, "VMOVAPD">,
3260 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3262 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3264 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3266 PS, EVEX_CD8<32, CD8VF>;
3268 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3270 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3272 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3274 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3276 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3277 HasAVX512, "VMOVDQA32">,
3278 PD, EVEX_CD8<32, CD8VF>;
3280 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3282 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3283 HasAVX512, "VMOVDQA64">,
3284 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3286 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
3287 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
3288 HasBWI, "VMOVDQU8", 1>,
3289 XD, EVEX_CD8<8, CD8VF>;
3291 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
3292 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
3293 HasBWI, "VMOVDQU16", 1>,
3294 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3296 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3298 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
3299 HasAVX512, "VMOVDQU32">,
3300 XS, EVEX_CD8<32, CD8VF>;
3302 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3304 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
3305 HasAVX512, "VMOVDQU64">,
3306 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3308 // Special instructions to help with spilling when we don't have VLX. We need
3309 // to load or store from a ZMM register instead. These are converted in
3310 // expandPostRAPseudos.
3311 let isReMaterializable = 1, canFoldAsLoad = 1,
3312 isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3313 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3314 "", [], IIC_SSE_MOVA_P_RM>;
3315 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3316 "", [], IIC_SSE_MOVA_P_RM>;
3317 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3318 "", [], IIC_SSE_MOVA_P_RM>;
3319 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3320 "", [], IIC_SSE_MOVA_P_RM>;
3323 let isPseudo = 1, SchedRW = [WriteStore], mayStore = 1, hasSideEffects = 0 in {
3324 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3325 "", [], IIC_SSE_MOVA_P_MR>;
3326 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3327 "", [], IIC_SSE_MOVA_P_MR>;
3328 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3329 "", [], IIC_SSE_MOVA_P_MR>;
3330 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3331 "", [], IIC_SSE_MOVA_P_MR>;
3334 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3335 (v8i64 VR512:$src))),
3336 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3339 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3340 (v16i32 VR512:$src))),
3341 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3343 // These patterns exist to prevent the above patterns from introducing a second
3344 // mask inversion when one already exists.
3345 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3346 (bc_v8i64 (v16i32 immAllZerosV)),
3347 (v8i64 VR512:$src))),
3348 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3349 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3350 (v16i32 immAllZerosV),
3351 (v16i32 VR512:$src))),
3352 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3354 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3355 X86VectorVTInfo Wide> {
3356 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3357 Narrow.RC:$src1, Narrow.RC:$src0)),
3360 (!cast<Instruction>(InstrStr#"rrk")
3361 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3362 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3363 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3366 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3367 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3370 (!cast<Instruction>(InstrStr#"rrkz")
3371 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3372 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3376 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3377 // available. Use a 512-bit operation and extract.
3378 let Predicates = [HasAVX512, NoVLX] in {
3379 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3380 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3383 let Predicates = [HasAVX512] in {
3385 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3386 (VMOVDQA32Zmr addr:$dst, VR512:$src)>;
3387 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3388 (VMOVDQA32Zmr addr:$dst, VR512:$src)>;
3389 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3390 (VMOVDQU32Zmr addr:$dst, VR512:$src)>;
3391 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3392 (VMOVDQU32Zmr addr:$dst, VR512:$src)>;
3395 let Predicates = [HasVLX] in {
3397 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3398 (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3399 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3400 (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3401 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3402 (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3403 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3404 (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3407 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3408 (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3409 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3410 (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3411 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3412 (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3413 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3414 (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3417 multiclass masked_move_for_extract<string InstrStr, X86VectorVTInfo From,
3418 X86VectorVTInfo To, X86VectorVTInfo Cast> {
3419 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3421 (To.VT (extract_subvector
3422 (From.VT From.RC:$src), (iPTR 0)))),
3424 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
3425 Cast.RC:$src0, Cast.KRCWM:$mask,
3426 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3428 def : Pat<(Cast.VT (vselect Cast.KRCWM:$mask,
3430 (To.VT (extract_subvector
3431 (From.VT From.RC:$src), (iPTR 0)))),
3432 Cast.ImmAllZerosV)),
3433 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
3435 (EXTRACT_SUBREG From.RC:$src, To.SubRegIdx)))>;
3439 let Predicates = [HasVLX] in {
3440 // A masked extract from the first 128-bits of a 256-bit vector can be
3441 // implemented with masked move.
3442 defm : masked_move_for_extract<"VMOVDQA64Z128", v4i64x_info, v2i64x_info, v2i64x_info>;
3443 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i32x_info, v4i32x_info, v2i64x_info>;
3444 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i16x_info, v8i16x_info, v2i64x_info>;
3445 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i8x_info, v16i8x_info, v2i64x_info>;
3446 defm : masked_move_for_extract<"VMOVDQA32Z128", v4i64x_info, v2i64x_info, v4i32x_info>;
3447 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i32x_info, v4i32x_info, v4i32x_info>;
3448 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i16x_info, v8i16x_info, v4i32x_info>;
3449 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i8x_info, v16i8x_info, v4i32x_info>;
3450 defm : masked_move_for_extract<"VMOVAPDZ128", v4f64x_info, v2f64x_info, v2f64x_info>;
3451 defm : masked_move_for_extract<"VMOVAPDZ128", v8f32x_info, v4f32x_info, v2f64x_info>;
3452 defm : masked_move_for_extract<"VMOVAPSZ128", v4f64x_info, v2f64x_info, v4f32x_info>;
3453 defm : masked_move_for_extract<"VMOVAPSZ128", v8f32x_info, v4f32x_info, v4f32x_info>;
3455 // A masked extract from the first 128-bits of a 512-bit vector can be
3456 // implemented with masked move.
3457 defm : masked_move_for_extract<"VMOVDQA64Z128", v8i64_info, v2i64x_info, v2i64x_info>;
3458 defm : masked_move_for_extract<"VMOVDQA64Z128", v16i32_info, v4i32x_info, v2i64x_info>;
3459 defm : masked_move_for_extract<"VMOVDQA64Z128", v32i16_info, v8i16x_info, v2i64x_info>;
3460 defm : masked_move_for_extract<"VMOVDQA64Z128", v64i8_info, v16i8x_info, v2i64x_info>;
3461 defm : masked_move_for_extract<"VMOVDQA32Z128", v8i64_info, v2i64x_info, v4i32x_info>;
3462 defm : masked_move_for_extract<"VMOVDQA32Z128", v16i32_info, v4i32x_info, v4i32x_info>;
3463 defm : masked_move_for_extract<"VMOVDQA32Z128", v32i16_info, v8i16x_info, v4i32x_info>;
3464 defm : masked_move_for_extract<"VMOVDQA32Z128", v64i8_info, v16i8x_info, v4i32x_info>;
3465 defm : masked_move_for_extract<"VMOVAPDZ128", v8f64_info, v2f64x_info, v2f64x_info>;
3466 defm : masked_move_for_extract<"VMOVAPDZ128", v16f32_info, v4f32x_info, v2f64x_info>;
3467 defm : masked_move_for_extract<"VMOVAPSZ128", v8f64_info, v2f64x_info, v4f32x_info>;
3468 defm : masked_move_for_extract<"VMOVAPSZ128", v16f32_info, v4f32x_info, v4f32x_info>;
3470 // A masked extract from the first 256-bits of a 512-bit vector can be
3471 // implemented with masked move.
3472 defm : masked_move_for_extract<"VMOVDQA64Z256", v8i64_info, v4i64x_info, v4i64x_info>;
3473 defm : masked_move_for_extract<"VMOVDQA64Z256", v16i32_info, v8i32x_info, v4i64x_info>;
3474 defm : masked_move_for_extract<"VMOVDQA64Z256", v32i16_info, v16i16x_info, v4i64x_info>;
3475 defm : masked_move_for_extract<"VMOVDQA64Z256", v64i8_info, v32i8x_info, v4i64x_info>;
3476 defm : masked_move_for_extract<"VMOVDQA32Z256", v8i64_info, v4i64x_info, v8i32x_info>;
3477 defm : masked_move_for_extract<"VMOVDQA32Z256", v16i32_info, v8i32x_info, v8i32x_info>;
3478 defm : masked_move_for_extract<"VMOVDQA32Z256", v32i16_info, v16i16x_info, v8i32x_info>;
3479 defm : masked_move_for_extract<"VMOVDQA32Z256", v64i8_info, v32i8x_info, v8i32x_info>;
3480 defm : masked_move_for_extract<"VMOVAPDZ256", v8f64_info, v4f64x_info, v4f64x_info>;
3481 defm : masked_move_for_extract<"VMOVAPDZ256", v16f32_info, v8f32x_info, v4f64x_info>;
3482 defm : masked_move_for_extract<"VMOVAPSZ256", v8f64_info, v4f64x_info, v8f32x_info>;
3483 defm : masked_move_for_extract<"VMOVAPSZ256", v16f32_info, v8f32x_info, v8f32x_info>;
3486 // Move Int Doubleword to Packed Double Int
3488 let ExeDomain = SSEPackedInt in {
3489 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3490 "vmovd\t{$src, $dst|$dst, $src}",
3492 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
3493 EVEX, Sched<[WriteMove]>;
3494 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3495 "vmovd\t{$src, $dst|$dst, $src}",
3497 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
3498 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
3499 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3500 "vmovq\t{$src, $dst|$dst, $src}",
3502 (v2i64 (scalar_to_vector GR64:$src)))],
3503 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3504 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3505 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3507 "vmovq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3508 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteLoad]>;
3509 let isCodeGenOnly = 1 in {
3510 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3511 "vmovq\t{$src, $dst|$dst, $src}",
3512 [(set FR64X:$dst, (bitconvert GR64:$src))],
3513 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3514 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3515 "vmovq\t{$src, $dst|$dst, $src}",
3516 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3517 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
3518 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3519 "vmovq\t{$src, $dst|$dst, $src}",
3520 [(set GR64:$dst, (bitconvert FR64X:$src))],
3521 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3522 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3523 "vmovq\t{$src, $dst|$dst, $src}",
3524 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
3525 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3526 EVEX_CD8<64, CD8VT1>;
3528 } // ExeDomain = SSEPackedInt
3530 // Move Int Doubleword to Single Scalar
3532 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3533 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3534 "vmovd\t{$src, $dst|$dst, $src}",
3535 [(set FR32X:$dst, (bitconvert GR32:$src))],
3536 IIC_SSE_MOVDQ>, EVEX, Sched<[WriteMove]>;
3538 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3539 "vmovd\t{$src, $dst|$dst, $src}",
3540 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
3541 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteLoad]>;
3542 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3544 // Move doubleword from xmm register to r/m32
3546 let ExeDomain = SSEPackedInt in {
3547 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3548 "vmovd\t{$src, $dst|$dst, $src}",
3549 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3550 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
3551 EVEX, Sched<[WriteMove]>;
3552 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3553 (ins i32mem:$dst, VR128X:$src),
3554 "vmovd\t{$src, $dst|$dst, $src}",
3555 [(store (i32 (extractelt (v4i32 VR128X:$src),
3556 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
3557 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
3558 } // ExeDomain = SSEPackedInt
3560 // Move quadword from xmm1 register to r/m64
3562 let ExeDomain = SSEPackedInt in {
3563 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3564 "vmovq\t{$src, $dst|$dst, $src}",
3565 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3567 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteMove]>,
3568 Requires<[HasAVX512, In64BitMode]>;
3570 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3571 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3572 "vmovq\t{$src, $dst|$dst, $src}",
3573 [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W, Sched<[WriteStore]>,
3574 Requires<[HasAVX512, In64BitMode]>;
3576 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3577 (ins i64mem:$dst, VR128X:$src),
3578 "vmovq\t{$src, $dst|$dst, $src}",
3579 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3580 addr:$dst)], IIC_SSE_MOVDQ>,
3581 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3582 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3584 let hasSideEffects = 0 in
3585 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3587 "vmovq.s\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>,
3588 EVEX, VEX_W, Sched<[WriteMove]>;
3589 } // ExeDomain = SSEPackedInt
3591 // Move Scalar Single to Double Int
3593 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3594 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3596 "vmovd\t{$src, $dst|$dst, $src}",
3597 [(set GR32:$dst, (bitconvert FR32X:$src))],
3598 IIC_SSE_MOVD_ToGP>, EVEX, Sched<[WriteMove]>;
3599 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3600 (ins i32mem:$dst, FR32X:$src),
3601 "vmovd\t{$src, $dst|$dst, $src}",
3602 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
3603 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteStore]>;
3604 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3606 // Move Quadword Int to Packed Quadword Int
3608 let ExeDomain = SSEPackedInt in {
3609 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3611 "vmovq\t{$src, $dst|$dst, $src}",
3613 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3614 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteLoad]>;
3615 } // ExeDomain = SSEPackedInt
3617 //===----------------------------------------------------------------------===//
3618 // AVX-512 MOVSS, MOVSD
3619 //===----------------------------------------------------------------------===//
3621 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3622 X86VectorVTInfo _> {
3623 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3624 (ins _.RC:$src1, _.RC:$src2),
3625 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3626 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3627 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, Sched<[WriteMove]>;
3628 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3629 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3630 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3631 "$dst {${mask}} {z}, $src1, $src2}"),
3632 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3633 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3635 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ, Sched<[WriteMove]>;
3636 let Constraints = "$src0 = $dst" in
3637 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3638 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3639 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3640 "$dst {${mask}}, $src1, $src2}"),
3641 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3642 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3643 (_.VT _.RC:$src0))))],
3644 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K, Sched<[WriteMove]>;
3645 let canFoldAsLoad = 1, isReMaterializable = 1 in
3646 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3647 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3648 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3649 _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, Sched<[WriteLoad]>;
3650 let mayLoad = 1, hasSideEffects = 0 in {
3651 let Constraints = "$src0 = $dst" in
3652 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3653 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3654 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3655 "$dst {${mask}}, $src}"),
3656 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K, Sched<[WriteLoad]>;
3657 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3658 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3659 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3660 "$dst {${mask}} {z}, $src}"),
3661 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ, Sched<[WriteLoad]>;
3663 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3664 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3665 [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
3666 EVEX, Sched<[WriteStore]>;
3667 let mayStore = 1, hasSideEffects = 0 in
3668 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3669 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3670 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3671 [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K, Sched<[WriteStore]>;
3674 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3675 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3677 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3678 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3681 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3682 PatLeaf ZeroFP, X86VectorVTInfo _> {
3684 def : Pat<(_.VT (OpNode _.RC:$src0,
3685 (_.VT (scalar_to_vector
3686 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3687 (_.EltVT _.FRC:$src1),
3688 (_.EltVT _.FRC:$src2))))))),
3689 (!cast<Instruction>(InstrStr#rrk)
3690 (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
3691 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3693 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
3695 def : Pat<(_.VT (OpNode _.RC:$src0,
3696 (_.VT (scalar_to_vector
3697 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3698 (_.EltVT _.FRC:$src1),
3699 (_.EltVT ZeroFP))))))),
3700 (!cast<Instruction>(InstrStr#rrkz)
3701 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3703 (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>;
3706 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3707 dag Mask, RegisterClass MaskRC> {
3709 def : Pat<(masked_store addr:$dst, Mask,
3710 (_.info512.VT (insert_subvector undef,
3711 (_.info256.VT (insert_subvector undef,
3712 (_.info128.VT _.info128.RC:$src),
3715 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3716 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3717 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3721 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3722 AVX512VLVectorVTInfo _,
3723 dag Mask, RegisterClass MaskRC,
3724 SubRegIndex subreg> {
3726 def : Pat<(masked_store addr:$dst, Mask,
3727 (_.info512.VT (insert_subvector undef,
3728 (_.info256.VT (insert_subvector undef,
3729 (_.info128.VT _.info128.RC:$src),
3732 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3733 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3734 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3738 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3739 dag Mask, RegisterClass MaskRC> {
3741 def : Pat<(_.info128.VT (extract_subvector
3742 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3743 (_.info512.VT (bitconvert
3744 (v16i32 immAllZerosV))))),
3746 (!cast<Instruction>(InstrStr#rmkz)
3747 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3750 def : Pat<(_.info128.VT (extract_subvector
3751 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3752 (_.info512.VT (insert_subvector undef,
3753 (_.info256.VT (insert_subvector undef,
3754 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3758 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3759 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3764 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3765 AVX512VLVectorVTInfo _,
3766 dag Mask, RegisterClass MaskRC,
3767 SubRegIndex subreg> {
3769 def : Pat<(_.info128.VT (extract_subvector
3770 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3771 (_.info512.VT (bitconvert
3772 (v16i32 immAllZerosV))))),
3774 (!cast<Instruction>(InstrStr#rmkz)
3775 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3778 def : Pat<(_.info128.VT (extract_subvector
3779 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3780 (_.info512.VT (insert_subvector undef,
3781 (_.info256.VT (insert_subvector undef,
3782 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3786 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3787 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3792 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3793 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3795 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3796 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3797 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3798 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3799 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3800 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3802 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3803 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3804 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3805 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3806 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3807 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3809 def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3810 (f32 FR32X:$src1), (f32 FR32X:$src2))),
3812 (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3813 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3814 GR8:$mask, sub_8bit)), VK1WM),
3815 (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)),
3818 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
3819 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3820 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
3821 (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
3823 def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
3824 (f64 FR64X:$src1), (f64 FR64X:$src2))),
3826 (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3827 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
3828 GR8:$mask, sub_8bit)), VK1WM),
3829 (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)),
3832 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
3833 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3834 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
3835 (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
3837 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
3838 (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
3839 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3841 let hasSideEffects = 0 in {
3842 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3843 (ins VR128X:$src1, VR128X:$src2),
3844 "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3845 [], IIC_SSE_MOV_S_RR>, XS, EVEX_4V, VEX_LIG,
3846 FoldGenData<"VMOVSSZrr">, Sched<[WriteMove]>;
3848 let Constraints = "$src0 = $dst" in
3849 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3850 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
3851 VR128X:$src1, VR128X:$src2),
3852 "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
3853 "$dst {${mask}}, $src1, $src2}",
3854 [], IIC_SSE_MOV_S_RR>, EVEX_K, XS, EVEX_4V, VEX_LIG,
3855 FoldGenData<"VMOVSSZrrk">, Sched<[WriteMove]>;
3857 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3858 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
3859 "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3860 "$dst {${mask}} {z}, $src1, $src2}",
3861 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
3862 FoldGenData<"VMOVSSZrrkz">, Sched<[WriteMove]>;
3864 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3865 (ins VR128X:$src1, VR128X:$src2),
3866 "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3867 [], IIC_SSE_MOV_S_RR>, XD, EVEX_4V, VEX_LIG, VEX_W,
3868 FoldGenData<"VMOVSDZrr">, Sched<[WriteMove]>;
3870 let Constraints = "$src0 = $dst" in
3871 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3872 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
3873 VR128X:$src1, VR128X:$src2),
3874 "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
3875 "$dst {${mask}}, $src1, $src2}",
3876 [], IIC_SSE_MOV_S_RR>, EVEX_K, XD, EVEX_4V, VEX_LIG,
3877 VEX_W, FoldGenData<"VMOVSDZrrk">, Sched<[WriteMove]>;
3879 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3880 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
3882 "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3883 "$dst {${mask}} {z}, $src1, $src2}",
3884 [], IIC_SSE_MOV_S_RR>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
3885 VEX_W, FoldGenData<"VMOVSDZrrkz">, Sched<[WriteMove]>;
3888 let Predicates = [HasAVX512] in {
3889 let AddedComplexity = 15 in {
3890 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
3891 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
3892 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
3893 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
3894 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
3895 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3896 (COPY_TO_REGCLASS FR64X:$src, VR128))>;
3899 // Move low f32 and clear high bits.
3900 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
3901 (SUBREG_TO_REG (i32 0),
3902 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
3903 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
3904 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
3905 (SUBREG_TO_REG (i32 0),
3906 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
3907 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
3908 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
3909 (SUBREG_TO_REG (i32 0),
3910 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
3911 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
3912 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
3913 (SUBREG_TO_REG (i32 0),
3914 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
3915 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
3917 let AddedComplexity = 20 in {
3918 // MOVSSrm zeros the high parts of the register; represent this
3919 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3920 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
3921 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3922 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
3923 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3924 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
3925 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3926 def : Pat<(v4f32 (X86vzload addr:$src)),
3927 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
3929 // MOVSDrm zeros the high parts of the register; represent this
3930 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
3931 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
3932 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3933 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
3934 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3935 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
3936 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3937 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
3938 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3939 def : Pat<(v2f64 (X86vzload addr:$src)),
3940 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
3942 // Represent the same patterns above but in the form they appear for
3944 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
3945 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
3946 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
3947 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
3948 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
3949 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3950 def : Pat<(v8f32 (X86vzload addr:$src)),
3951 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3952 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
3953 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
3954 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3955 def : Pat<(v4f64 (X86vzload addr:$src)),
3956 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3958 // Represent the same patterns above but in the form they appear for
3960 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
3961 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
3962 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
3963 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
3964 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
3965 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3966 def : Pat<(v16f32 (X86vzload addr:$src)),
3967 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
3968 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
3969 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
3970 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3971 def : Pat<(v8f64 (X86vzload addr:$src)),
3972 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
3974 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
3975 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
3976 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
3978 // Move low f64 and clear high bits.
3979 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
3980 (SUBREG_TO_REG (i32 0),
3981 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3982 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
3983 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
3984 (SUBREG_TO_REG (i32 0),
3985 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
3986 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
3988 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
3989 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
3990 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
3991 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
3992 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
3993 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
3995 // Extract and store.
3996 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
3998 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4000 // Shuffle with VMOVSS
4001 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
4002 (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>;
4004 def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))),
4005 (VMOVSSZrr VR128X:$src1,
4006 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
4008 // Shuffle with VMOVSD
4009 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
4010 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4012 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
4013 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
4015 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
4016 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4017 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
4018 (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
4021 let AddedComplexity = 15 in
4022 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4024 "vmovq\t{$src, $dst|$dst, $src}",
4025 [(set VR128X:$dst, (v2i64 (X86vzmovl
4026 (v2i64 VR128X:$src))))],
4027 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
4029 let Predicates = [HasAVX512] in {
4030 let AddedComplexity = 15 in {
4031 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4032 (VMOVDI2PDIZrr GR32:$src)>;
4034 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4035 (VMOV64toPQIZrr GR64:$src)>;
4037 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4038 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4039 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4041 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4042 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4043 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4045 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4046 let AddedComplexity = 20 in {
4047 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4048 (VMOVDI2PDIZrm addr:$src)>;
4049 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4050 (VMOVDI2PDIZrm addr:$src)>;
4051 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4052 (VMOVDI2PDIZrm addr:$src)>;
4053 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4054 (VMOVDI2PDIZrm addr:$src)>;
4055 def : Pat<(v4i32 (X86vzload addr:$src)),
4056 (VMOVDI2PDIZrm addr:$src)>;
4057 def : Pat<(v8i32 (X86vzload addr:$src)),
4058 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4059 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4060 (VMOVQI2PQIZrm addr:$src)>;
4061 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4062 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4063 def : Pat<(v2i64 (X86vzload addr:$src)),
4064 (VMOVQI2PQIZrm addr:$src)>;
4065 def : Pat<(v4i64 (X86vzload addr:$src)),
4066 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4069 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4070 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4071 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4072 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4073 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4074 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4075 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4077 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4078 def : Pat<(v16i32 (X86vzload addr:$src)),
4079 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4080 def : Pat<(v8i64 (X86vzload addr:$src)),
4081 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4083 //===----------------------------------------------------------------------===//
4084 // AVX-512 - Non-temporals
4085 //===----------------------------------------------------------------------===//
4086 let SchedRW = [WriteLoad] in {
4087 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4088 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4089 [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
4090 EVEX_CD8<64, CD8VF>;
4092 let Predicates = [HasVLX] in {
4093 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4095 "vmovntdqa\t{$src, $dst|$dst, $src}",
4096 [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
4097 EVEX_CD8<64, CD8VF>;
4099 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4101 "vmovntdqa\t{$src, $dst|$dst, $src}",
4102 [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
4103 EVEX_CD8<64, CD8VF>;
4107 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4108 PatFrag st_frag = alignednontemporalstore,
4109 InstrItinClass itin = IIC_SSE_MOVNT> {
4110 let SchedRW = [WriteStore], AddedComplexity = 400 in
4111 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4112 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4113 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4114 _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4117 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4118 AVX512VLVectorVTInfo VTInfo> {
4119 let Predicates = [HasAVX512] in
4120 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
4122 let Predicates = [HasAVX512, HasVLX] in {
4123 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4124 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
4128 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4129 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4130 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
4132 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4133 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4134 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4135 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4136 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4137 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4138 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4140 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4141 (VMOVNTDQAZrm addr:$src)>;
4142 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4143 (VMOVNTDQAZrm addr:$src)>;
4144 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4145 (VMOVNTDQAZrm addr:$src)>;
4148 let Predicates = [HasVLX], AddedComplexity = 400 in {
4149 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4150 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4151 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4152 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4153 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4154 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4156 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4157 (VMOVNTDQAZ256rm addr:$src)>;
4158 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4159 (VMOVNTDQAZ256rm addr:$src)>;
4160 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4161 (VMOVNTDQAZ256rm addr:$src)>;
4163 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4164 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4165 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4166 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4167 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4168 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4170 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4171 (VMOVNTDQAZ128rm addr:$src)>;
4172 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4173 (VMOVNTDQAZ128rm addr:$src)>;
4174 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4175 (VMOVNTDQAZ128rm addr:$src)>;
4178 //===----------------------------------------------------------------------===//
4179 // AVX-512 - Integer arithmetic
4181 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4182 X86VectorVTInfo _, OpndItins itins,
4183 bit IsCommutable = 0> {
4184 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4185 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4186 "$src2, $src1", "$src1, $src2",
4187 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4188 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4189 Sched<[itins.Sched]>;
4191 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4192 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4193 "$src2, $src1", "$src1, $src2",
4194 (_.VT (OpNode _.RC:$src1,
4195 (bitconvert (_.LdFrag addr:$src2)))),
4196 itins.rm>, AVX512BIBase, EVEX_4V,
4197 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4200 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4201 X86VectorVTInfo _, OpndItins itins,
4202 bit IsCommutable = 0> :
4203 avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
4204 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4205 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4206 "${src2}"##_.BroadcastStr##", $src1",
4207 "$src1, ${src2}"##_.BroadcastStr,
4208 (_.VT (OpNode _.RC:$src1,
4210 (_.ScalarLdFrag addr:$src2)))),
4211 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4212 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4215 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4216 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4217 Predicate prd, bit IsCommutable = 0> {
4218 let Predicates = [prd] in
4219 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4220 IsCommutable>, EVEX_V512;
4222 let Predicates = [prd, HasVLX] in {
4223 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4224 IsCommutable>, EVEX_V256;
4225 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4226 IsCommutable>, EVEX_V128;
4230 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4231 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4232 Predicate prd, bit IsCommutable = 0> {
4233 let Predicates = [prd] in
4234 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4235 IsCommutable>, EVEX_V512;
4237 let Predicates = [prd, HasVLX] in {
4238 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4239 IsCommutable>, EVEX_V256;
4240 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4241 IsCommutable>, EVEX_V128;
4245 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4246 OpndItins itins, Predicate prd,
4247 bit IsCommutable = 0> {
4248 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4249 itins, prd, IsCommutable>,
4250 VEX_W, EVEX_CD8<64, CD8VF>;
4253 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4254 OpndItins itins, Predicate prd,
4255 bit IsCommutable = 0> {
4256 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4257 itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4260 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4261 OpndItins itins, Predicate prd,
4262 bit IsCommutable = 0> {
4263 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4264 itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4268 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4269 OpndItins itins, Predicate prd,
4270 bit IsCommutable = 0> {
4271 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4272 itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4276 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4277 SDNode OpNode, OpndItins itins, Predicate prd,
4278 bit IsCommutable = 0> {
4279 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
4282 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
4286 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4287 SDNode OpNode, OpndItins itins, Predicate prd,
4288 bit IsCommutable = 0> {
4289 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
4292 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
4296 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4297 bits<8> opc_d, bits<8> opc_q,
4298 string OpcodeStr, SDNode OpNode,
4299 OpndItins itins, bit IsCommutable = 0> {
4300 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4301 itins, HasAVX512, IsCommutable>,
4302 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4303 itins, HasBWI, IsCommutable>;
4306 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
4307 SDNode OpNode,X86VectorVTInfo _Src,
4308 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4309 bit IsCommutable = 0> {
4310 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4311 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4312 "$src2, $src1","$src1, $src2",
4314 (_Src.VT _Src.RC:$src1),
4315 (_Src.VT _Src.RC:$src2))),
4316 itins.rr, IsCommutable>,
4317 AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
4318 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4319 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4320 "$src2, $src1", "$src1, $src2",
4321 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4322 (bitconvert (_Src.LdFrag addr:$src2)))),
4323 itins.rm>, AVX512BIBase, EVEX_4V,
4324 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4326 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4327 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4329 "${src2}"##_Brdct.BroadcastStr##", $src1",
4330 "$src1, ${src2}"##_Brdct.BroadcastStr,
4331 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4332 (_Brdct.VT (X86VBroadcast
4333 (_Brdct.ScalarLdFrag addr:$src2)))))),
4334 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4335 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4338 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4339 SSE_INTALU_ITINS_P, 1>;
4340 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4341 SSE_INTALU_ITINS_P, 0>;
4342 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4343 SSE_INTALU_ITINS_P, HasBWI, 1>;
4344 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4345 SSE_INTALU_ITINS_P, HasBWI, 0>;
4346 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4347 SSE_INTALU_ITINS_P, HasBWI, 1>;
4348 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4349 SSE_INTALU_ITINS_P, HasBWI, 0>;
4350 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4351 SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD;
4352 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4353 SSE_INTMUL_ITINS_P, HasBWI, 1>;
4354 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4355 SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD;
4356 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P,
4358 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
4360 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
4362 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4363 SSE_INTALU_ITINS_P, HasBWI, 1>;
4365 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
4366 AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4367 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4368 let Predicates = [prd] in
4369 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4370 _SrcVTInfo.info512, _DstVTInfo.info512,
4371 v8i64_info, IsCommutable>,
4372 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4373 let Predicates = [HasVLX, prd] in {
4374 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4375 _SrcVTInfo.info256, _DstVTInfo.info256,
4376 v4i64x_info, IsCommutable>,
4377 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4378 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4379 _SrcVTInfo.info128, _DstVTInfo.info128,
4380 v2i64x_info, IsCommutable>,
4381 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4385 defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTMUL_ITINS_P,
4386 avx512vl_i32_info, avx512vl_i64_info,
4387 X86pmuldq, HasAVX512, 1>,T8PD;
4388 defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
4389 avx512vl_i32_info, avx512vl_i64_info,
4390 X86pmuludq, HasAVX512, 1>;
4391 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4392 avx512vl_i8_info, avx512vl_i8_info,
4393 X86multishift, HasVBMI, 0>, T8PD;
4395 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4396 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4398 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4399 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4401 "${src2}"##_Src.BroadcastStr##", $src1",
4402 "$src1, ${src2}"##_Src.BroadcastStr,
4403 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4404 (_Src.VT (X86VBroadcast
4405 (_Src.ScalarLdFrag addr:$src2)))))),
4406 itins.rm>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4407 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4410 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4411 SDNode OpNode,X86VectorVTInfo _Src,
4412 X86VectorVTInfo _Dst, OpndItins itins,
4413 bit IsCommutable = 0> {
4414 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4415 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4416 "$src2, $src1","$src1, $src2",
4418 (_Src.VT _Src.RC:$src1),
4419 (_Src.VT _Src.RC:$src2))),
4420 itins.rr, IsCommutable>,
4421 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>;
4422 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4423 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4424 "$src2, $src1", "$src1, $src2",
4425 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4426 (bitconvert (_Src.LdFrag addr:$src2)))), itins.rm>,
4427 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4428 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4431 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4433 let Predicates = [HasBWI] in
4434 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4435 v32i16_info, SSE_PACK>,
4436 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4437 v32i16_info, SSE_PACK>, EVEX_V512;
4438 let Predicates = [HasBWI, HasVLX] in {
4439 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4440 v16i16x_info, SSE_PACK>,
4441 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4442 v16i16x_info, SSE_PACK>, EVEX_V256;
4443 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4444 v8i16x_info, SSE_PACK>,
4445 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4446 v8i16x_info, SSE_PACK>, EVEX_V128;
4449 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4451 let Predicates = [HasBWI] in
4452 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
4453 v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG;
4454 let Predicates = [HasBWI, HasVLX] in {
4455 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4456 v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG;
4457 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4458 v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG;
4462 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4463 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4464 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4465 let Predicates = [HasBWI] in
4466 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4467 _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512;
4468 let Predicates = [HasBWI, HasVLX] in {
4469 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4470 _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256;
4471 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4472 _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128;
4476 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4477 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4478 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4479 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4481 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4482 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4483 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4484 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4486 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4487 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4488 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4489 SSE_INTALU_ITINS_P, HasBWI, 1>;
4490 defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
4491 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4493 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4494 SSE_INTALU_ITINS_P, HasBWI, 1>;
4495 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4496 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4497 defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
4498 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4500 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4501 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4502 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4503 SSE_INTALU_ITINS_P, HasBWI, 1>;
4504 defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
4505 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4507 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4508 SSE_INTALU_ITINS_P, HasBWI, 1>;
4509 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4510 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4511 defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
4512 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4514 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4515 let Predicates = [HasDQI, NoVLX] in {
4516 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4519 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4520 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4523 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4526 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4527 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4531 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4532 let Predicates = [HasDQI, NoVLX] in {
4533 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4536 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4537 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4540 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4543 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4544 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4548 multiclass avx512_min_max_lowering<Instruction Instr, SDNode OpNode> {
4549 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4552 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4553 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4556 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4559 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4560 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4564 let Predicates = [HasAVX512, NoVLX] in {
4565 defm : avx512_min_max_lowering<VPMAXUQZrr, umax>;
4566 defm : avx512_min_max_lowering<VPMINUQZrr, umin>;
4567 defm : avx512_min_max_lowering<VPMAXSQZrr, smax>;
4568 defm : avx512_min_max_lowering<VPMINSQZrr, smin>;
4571 //===----------------------------------------------------------------------===//
4572 // AVX-512 Logical Instructions
4573 //===----------------------------------------------------------------------===//
4575 // OpNodeMsk is the OpNode to use when element size is important. OpNode will
4576 // be set to null_frag for 32-bit elements.
4577 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr,
4578 SDPatternOperator OpNode,
4579 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
4580 bit IsCommutable = 0> {
4581 let hasSideEffects = 0 in
4582 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4583 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4584 "$src2, $src1", "$src1, $src2",
4585 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4586 (bitconvert (_.VT _.RC:$src2)))),
4587 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4589 itins.rr, IsCommutable>, AVX512BIBase, EVEX_4V,
4590 Sched<[itins.Sched]>;
4592 let hasSideEffects = 0, mayLoad = 1 in
4593 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4594 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4595 "$src2, $src1", "$src1, $src2",
4596 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4597 (bitconvert (_.LdFrag addr:$src2)))),
4598 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4599 (bitconvert (_.LdFrag addr:$src2)))))),
4600 itins.rm>, AVX512BIBase, EVEX_4V,
4601 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4604 // OpNodeMsk is the OpNode to use where element size is important. So use
4605 // for all of the broadcast patterns.
4606 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr,
4607 SDPatternOperator OpNode,
4608 SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _,
4609 bit IsCommutable = 0> :
4610 avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _,
4612 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4613 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4614 "${src2}"##_.BroadcastStr##", $src1",
4615 "$src1, ${src2}"##_.BroadcastStr,
4616 (_.i64VT (OpNodeMsk _.RC:$src1,
4618 (_.VT (X86VBroadcast
4619 (_.ScalarLdFrag addr:$src2)))))),
4620 (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1,
4622 (_.VT (X86VBroadcast
4623 (_.ScalarLdFrag addr:$src2)))))))),
4624 itins.rm>, AVX512BIBase, EVEX_4V, EVEX_B,
4625 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4628 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr,
4629 SDPatternOperator OpNode,
4630 SDNode OpNodeMsk, OpndItins itins,
4631 AVX512VLVectorVTInfo VTInfo,
4632 bit IsCommutable = 0> {
4633 let Predicates = [HasAVX512] in
4634 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4635 VTInfo.info512, IsCommutable>, EVEX_V512;
4637 let Predicates = [HasAVX512, HasVLX] in {
4638 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4639 VTInfo.info256, IsCommutable>, EVEX_V256;
4640 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins,
4641 VTInfo.info128, IsCommutable>, EVEX_V128;
4645 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4646 SDNode OpNode, OpndItins itins,
4647 bit IsCommutable = 0> {
4648 defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins,
4649 avx512vl_i64_info, IsCommutable>,
4650 VEX_W, EVEX_CD8<64, CD8VF>;
4651 defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins,
4652 avx512vl_i32_info, IsCommutable>,
4653 EVEX_CD8<32, CD8VF>;
4656 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>;
4657 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>;
4658 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>;
4659 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>;
4661 //===----------------------------------------------------------------------===//
4662 // AVX-512 FP arithmetic
4663 //===----------------------------------------------------------------------===//
4664 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4665 SDNode OpNode, SDNode VecNode, OpndItins itins,
4667 let ExeDomain = _.ExeDomain in {
4668 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4669 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4670 "$src2, $src1", "$src1, $src2",
4671 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4672 (i32 FROUND_CURRENT))),
4673 itins.rr>, Sched<[itins.Sched]>;
4675 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4676 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4677 "$src2, $src1", "$src1, $src2",
4678 (_.VT (VecNode _.RC:$src1,
4679 _.ScalarIntMemCPat:$src2,
4680 (i32 FROUND_CURRENT))),
4681 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4682 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4683 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4684 (ins _.FRC:$src1, _.FRC:$src2),
4685 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4686 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4687 itins.rr>, Sched<[itins.Sched]> {
4688 let isCommutable = IsCommutable;
4690 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4691 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4692 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4693 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4694 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4695 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4700 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4701 SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
4702 let ExeDomain = _.ExeDomain in
4703 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4704 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4705 "$rc, $src2, $src1", "$src1, $src2, $rc",
4706 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4707 (i32 imm:$rc)), itins.rr, IsCommutable>,
4708 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
4710 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4711 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4712 OpndItins itins, bit IsCommutable> {
4713 let ExeDomain = _.ExeDomain in {
4714 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4715 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4716 "$src2, $src1", "$src1, $src2",
4717 (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
4718 itins.rr>, Sched<[itins.Sched]>;
4720 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4721 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4722 "$src2, $src1", "$src1, $src2",
4723 (_.VT (VecNode _.RC:$src1,
4724 _.ScalarIntMemCPat:$src2)),
4725 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4727 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4728 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4729 (ins _.FRC:$src1, _.FRC:$src2),
4730 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4731 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4732 itins.rr>, Sched<[itins.Sched]> {
4733 let isCommutable = IsCommutable;
4735 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4736 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4737 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4738 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4739 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4740 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4743 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4744 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4745 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4746 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4747 (i32 FROUND_NO_EXC)), itins.rr>, EVEX_B,
4748 Sched<[itins.Sched]>;
4752 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4754 SizeItins itins, bit IsCommutable> {
4755 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4756 itins.s, IsCommutable>,
4757 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4758 itins.s, IsCommutable>,
4759 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4760 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4761 itins.d, IsCommutable>,
4762 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4763 itins.d, IsCommutable>,
4764 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4767 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
4768 SDNode VecNode, SDNode SaeNode,
4769 SizeItins itins, bit IsCommutable> {
4770 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4771 VecNode, SaeNode, itins.s, IsCommutable>,
4772 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4773 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4774 VecNode, SaeNode, itins.d, IsCommutable>,
4775 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4777 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4778 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4779 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4780 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4781 defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
4782 SSE_ALU_ITINS_S, 0>;
4783 defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
4784 SSE_ALU_ITINS_S, 0>;
4786 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4787 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
4788 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4789 X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
4790 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
4791 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4792 (ins _.FRC:$src1, _.FRC:$src2),
4793 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4794 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4795 itins.rr>, Sched<[itins.Sched]> {
4796 let isCommutable = 1;
4798 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4799 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4800 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4801 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4802 (_.ScalarLdFrag addr:$src2)))], itins.rm>,
4803 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4806 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
4807 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4808 EVEX_CD8<32, CD8VT1>;
4810 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
4811 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4812 EVEX_CD8<64, CD8VT1>;
4814 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
4815 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4816 EVEX_CD8<32, CD8VT1>;
4818 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
4819 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4820 EVEX_CD8<64, CD8VT1>;
4822 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4823 X86VectorVTInfo _, OpndItins itins,
4825 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
4826 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4827 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4828 "$src2, $src1", "$src1, $src2",
4829 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
4830 IsCommutable>, EVEX_4V, Sched<[itins.Sched]>;
4831 let mayLoad = 1 in {
4832 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4833 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
4834 "$src2, $src1", "$src1, $src2",
4835 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
4836 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
4837 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4838 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
4839 "${src2}"##_.BroadcastStr##", $src1",
4840 "$src1, ${src2}"##_.BroadcastStr,
4841 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
4842 (_.ScalarLdFrag addr:$src2)))),
4843 itins.rm>, EVEX_4V, EVEX_B,
4844 Sched<[itins.Sched.Folded, ReadAfterLd]>;
4849 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4850 OpndItins itins, X86VectorVTInfo _> {
4851 let ExeDomain = _.ExeDomain in
4852 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4853 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
4854 "$rc, $src2, $src1", "$src1, $src2, $rc",
4855 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc))), itins.rr>,
4856 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
4859 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4860 OpndItins itins, X86VectorVTInfo _> {
4861 let ExeDomain = _.ExeDomain in
4862 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4863 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4864 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4865 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC))), itins.rr>,
4866 EVEX_4V, EVEX_B, Sched<[itins.Sched]>;
4869 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4870 Predicate prd, SizeItins itins,
4871 bit IsCommutable = 0> {
4872 let Predicates = [prd] in {
4873 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
4874 itins.s, IsCommutable>, EVEX_V512, PS,
4875 EVEX_CD8<32, CD8VF>;
4876 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
4877 itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
4878 EVEX_CD8<64, CD8VF>;
4881 // Define only if AVX512VL feature is present.
4882 let Predicates = [prd, HasVLX] in {
4883 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
4884 itins.s, IsCommutable>, EVEX_V128, PS,
4885 EVEX_CD8<32, CD8VF>;
4886 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
4887 itins.s, IsCommutable>, EVEX_V256, PS,
4888 EVEX_CD8<32, CD8VF>;
4889 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
4890 itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
4891 EVEX_CD8<64, CD8VF>;
4892 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
4893 itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
4894 EVEX_CD8<64, CD8VF>;
4898 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4900 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
4901 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
4902 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
4903 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4906 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
4908 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>,
4909 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
4910 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>,
4911 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
4914 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
4915 SSE_ALU_ITINS_P, 1>,
4916 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>;
4917 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
4918 SSE_MUL_ITINS_P, 1>,
4919 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>;
4920 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
4921 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>;
4922 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
4923 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>;
4924 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
4925 SSE_ALU_ITINS_P, 0>,
4926 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>;
4927 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
4928 SSE_ALU_ITINS_P, 0>,
4929 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>;
4930 let isCodeGenOnly = 1 in {
4931 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
4932 SSE_ALU_ITINS_P, 1>;
4933 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
4934 SSE_ALU_ITINS_P, 1>;
4936 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
4937 SSE_ALU_ITINS_P, 1>;
4938 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
4939 SSE_ALU_ITINS_P, 0>;
4940 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
4941 SSE_ALU_ITINS_P, 1>;
4942 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
4943 SSE_ALU_ITINS_P, 1>;
4945 // Patterns catch floating point selects with bitcasted integer logic ops.
4946 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
4947 X86VectorVTInfo _, Predicate prd> {
4948 let Predicates = [prd] in {
4949 // Masked register-register logical operations.
4950 def : Pat<(_.VT (vselect _.KRCWM:$mask,
4951 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
4953 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
4954 _.RC:$src1, _.RC:$src2)>;
4955 def : Pat<(_.VT (vselect _.KRCWM:$mask,
4956 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
4958 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
4960 // Masked register-memory logical operations.
4961 def : Pat<(_.VT (vselect _.KRCWM:$mask,
4962 (bitconvert (_.i64VT (OpNode _.RC:$src1,
4963 (load addr:$src2)))),
4965 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
4966 _.RC:$src1, addr:$src2)>;
4967 def : Pat<(_.VT (vselect _.KRCWM:$mask,
4968 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
4970 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
4972 // Register-broadcast logical operations.
4973 def : Pat<(_.i64VT (OpNode _.RC:$src1,
4974 (bitconvert (_.VT (X86VBroadcast
4975 (_.ScalarLdFrag addr:$src2)))))),
4976 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
4977 def : Pat<(_.VT (vselect _.KRCWM:$mask,
4979 (_.i64VT (OpNode _.RC:$src1,
4982 (_.ScalarLdFrag addr:$src2))))))),
4984 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
4985 _.RC:$src1, addr:$src2)>;
4986 def : Pat<(_.VT (vselect _.KRCWM:$mask,
4988 (_.i64VT (OpNode _.RC:$src1,
4991 (_.ScalarLdFrag addr:$src2))))))),
4993 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
4994 _.RC:$src1, addr:$src2)>;
4998 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
4999 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5000 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5001 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5002 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5003 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5004 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5007 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5008 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5009 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5010 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5012 let Predicates = [HasVLX,HasDQI] in {
5013 // Use packed logical operations for scalar ops.
5014 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5015 (COPY_TO_REGCLASS (VANDPDZ128rr
5016 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5017 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5018 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5019 (COPY_TO_REGCLASS (VORPDZ128rr
5020 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5021 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5022 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5023 (COPY_TO_REGCLASS (VXORPDZ128rr
5024 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5025 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5026 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5027 (COPY_TO_REGCLASS (VANDNPDZ128rr
5028 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5029 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5031 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5032 (COPY_TO_REGCLASS (VANDPSZ128rr
5033 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5034 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5035 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5036 (COPY_TO_REGCLASS (VORPSZ128rr
5037 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5038 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5039 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5040 (COPY_TO_REGCLASS (VXORPSZ128rr
5041 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5042 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5043 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5044 (COPY_TO_REGCLASS (VANDNPSZ128rr
5045 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5046 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5049 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5050 OpndItins itins, X86VectorVTInfo _> {
5051 let ExeDomain = _.ExeDomain in {
5052 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5053 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5054 "$src2, $src1", "$src1, $src2",
5055 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))),
5056 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
5057 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5058 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5059 "$src2, $src1", "$src1, $src2",
5060 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT)),
5061 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5062 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5063 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5064 "${src2}"##_.BroadcastStr##", $src1",
5065 "$src1, ${src2}"##_.BroadcastStr,
5066 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5067 (_.ScalarLdFrag addr:$src2))),
5068 (i32 FROUND_CURRENT)), itins.rm>,
5069 EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5073 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5074 OpndItins itins, X86VectorVTInfo _> {
5075 let ExeDomain = _.ExeDomain in {
5076 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5077 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5078 "$src2, $src1", "$src1, $src2",
5079 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT))), itins.rr>,
5080 Sched<[itins.Sched]>;
5081 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5082 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix,
5083 "$src2, $src1", "$src1, $src2",
5084 (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2,
5085 (i32 FROUND_CURRENT)), itins.rm>,
5086 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5090 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
5091 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5092 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>,
5093 EVEX_V512, EVEX_CD8<32, CD8VF>;
5094 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5095 avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>,
5096 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5097 defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>,
5098 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
5099 EVEX_4V,EVEX_CD8<32, CD8VT1>;
5100 defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>,
5101 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
5102 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5104 // Define only if AVX512VL feature is present.
5105 let Predicates = [HasVLX] in {
5106 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>,
5107 EVEX_V128, EVEX_CD8<32, CD8VF>;
5108 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>,
5109 EVEX_V256, EVEX_CD8<32, CD8VF>;
5110 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>,
5111 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5112 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>,
5113 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5116 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
5118 //===----------------------------------------------------------------------===//
5119 // AVX-512 VPTESTM instructions
5120 //===----------------------------------------------------------------------===//
5122 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
5123 OpndItins itins, X86VectorVTInfo _> {
5124 let ExeDomain = _.ExeDomain in {
5125 let isCommutable = 1 in
5126 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5127 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5128 "$src2, $src1", "$src1, $src2",
5129 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
5130 EVEX_4V, Sched<[itins.Sched]>;
5131 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5132 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5133 "$src2, $src1", "$src1, $src2",
5134 (OpNode (_.VT _.RC:$src1),
5135 (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>,
5136 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5137 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5141 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5142 OpndItins itins, X86VectorVTInfo _> {
5143 let ExeDomain = _.ExeDomain in
5144 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5145 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5146 "${src2}"##_.BroadcastStr##", $src1",
5147 "$src1, ${src2}"##_.BroadcastStr,
5148 (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
5149 (_.ScalarLdFrag addr:$src2)))),
5150 itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5151 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5154 // Use 512bit version to implement 128/256 bit in case NoVLX.
5155 multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
5156 X86VectorVTInfo _, string Suffix> {
5157 def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
5158 (_.KVT (COPY_TO_REGCLASS
5159 (!cast<Instruction>(NAME # Suffix # "Zrr")
5160 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5161 _.RC:$src1, _.SubRegIdx),
5162 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5163 _.RC:$src2, _.SubRegIdx)),
5167 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5168 OpndItins itins, AVX512VLVectorVTInfo _,
5170 let Predicates = [HasAVX512] in
5171 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>,
5172 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5174 let Predicates = [HasAVX512, HasVLX] in {
5175 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>,
5176 avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
5177 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>,
5178 avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
5180 let Predicates = [HasAVX512, NoVLX] in {
5181 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5182 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
5186 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
5188 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
5189 avx512vl_i32_info, "D">;
5190 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
5191 avx512vl_i64_info, "Q">, VEX_W;
5194 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5195 SDNode OpNode, OpndItins itins> {
5196 let Predicates = [HasBWI] in {
5197 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>,
5199 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>,
5202 let Predicates = [HasVLX, HasBWI] in {
5204 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>,
5206 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>,
5208 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>,
5210 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>,
5214 let Predicates = [HasAVX512, NoVLX] in {
5215 defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
5216 defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
5217 defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
5218 defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
5222 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5223 SDNode OpNode, OpndItins itins> :
5224 avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
5225 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
5227 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm,
5228 SSE_BIT_ITINS_P>, T8PD;
5229 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
5230 SSE_BIT_ITINS_P>, T8XS;
5233 //===----------------------------------------------------------------------===//
5234 // AVX-512 Shift instructions
5235 //===----------------------------------------------------------------------===//
5236 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5237 string OpcodeStr, SDNode OpNode, OpndItins itins,
5238 X86VectorVTInfo _> {
5239 let ExeDomain = _.ExeDomain in {
5240 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5241 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5242 "$src2, $src1", "$src1, $src2",
5243 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
5244 itins.rr>, Sched<[itins.Sched]>;
5245 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5246 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5247 "$src2, $src1", "$src1, $src2",
5248 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5250 itins.rm>, Sched<[itins.Sched.Folded]>;
5254 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5255 string OpcodeStr, SDNode OpNode, OpndItins itins,
5256 X86VectorVTInfo _> {
5257 let ExeDomain = _.ExeDomain in
5258 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5259 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5260 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5261 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
5262 itins.rm>, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5265 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5266 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5267 X86VectorVTInfo _> {
5268 // src2 is always 128-bit
5269 let ExeDomain = _.ExeDomain in {
5270 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5271 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5272 "$src2, $src1", "$src1, $src2",
5273 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
5274 itins.rr>, AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>;
5275 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5276 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5277 "$src2, $src1", "$src1, $src2",
5278 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
5279 itins.rm>, AVX512BIBase,
5280 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
5284 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5285 OpndItins itins, ValueType SrcVT, PatFrag bc_frag,
5286 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
5287 let Predicates = [prd] in
5288 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5289 VTInfo.info512>, EVEX_V512,
5290 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5291 let Predicates = [prd, HasVLX] in {
5292 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5293 VTInfo.info256>, EVEX_V256,
5294 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5295 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag,
5296 VTInfo.info128>, EVEX_V128,
5297 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5301 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5302 string OpcodeStr, SDNode OpNode,
5304 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32,
5305 bc_v4i32, avx512vl_i32_info, HasAVX512>;
5306 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64,
5307 bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W;
5308 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16,
5309 bc_v2i64, avx512vl_i16_info, HasBWI>;
5312 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5313 string OpcodeStr, SDNode OpNode,
5314 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
5315 let Predicates = [HasAVX512] in
5316 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
5318 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5319 VTInfo.info512>, EVEX_V512;
5320 let Predicates = [HasAVX512, HasVLX] in {
5321 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins,
5323 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5324 VTInfo.info256>, EVEX_V256;
5325 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5326 itins, VTInfo.info128>,
5327 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins,
5328 VTInfo.info128>, EVEX_V128;
5332 multiclass avx512_shift_rmi_w<bits<8> opcw,
5333 Format ImmFormR, Format ImmFormM,
5334 string OpcodeStr, SDNode OpNode,
5336 let Predicates = [HasBWI] in
5337 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5338 itins, v32i16_info>, EVEX_V512, VEX_WIG;
5339 let Predicates = [HasVLX, HasBWI] in {
5340 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5341 itins, v16i16x_info>, EVEX_V256, VEX_WIG;
5342 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5343 itins, v8i16x_info>, EVEX_V128, VEX_WIG;
5347 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5348 Format ImmFormR, Format ImmFormM,
5349 string OpcodeStr, SDNode OpNode, OpndItins itins> {
5350 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5351 itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5352 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5353 itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5356 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5358 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5359 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5361 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5363 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5364 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5366 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5368 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5369 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5371 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5372 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5373 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5374 SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V;
5376 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>;
5377 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>;
5378 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>;
5380 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5381 let Predicates = [HasAVX512, NoVLX] in {
5382 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5383 (EXTRACT_SUBREG (v8i64
5385 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5386 VR128X:$src2)), sub_ymm)>;
5388 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5389 (EXTRACT_SUBREG (v8i64
5391 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5392 VR128X:$src2)), sub_xmm)>;
5394 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5395 (EXTRACT_SUBREG (v8i64
5397 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5398 imm:$src2)), sub_ymm)>;
5400 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5401 (EXTRACT_SUBREG (v8i64
5403 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5404 imm:$src2)), sub_xmm)>;
5407 //===-------------------------------------------------------------------===//
5408 // Variable Bit Shifts
5409 //===-------------------------------------------------------------------===//
5410 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5411 OpndItins itins, X86VectorVTInfo _> {
5412 let ExeDomain = _.ExeDomain in {
5413 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5414 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5415 "$src2, $src1", "$src1, $src2",
5416 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
5417 itins.rr>, AVX5128IBase, EVEX_4V,
5418 Sched<[itins.Sched]>;
5419 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5420 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5421 "$src2, $src1", "$src1, $src2",
5422 (_.VT (OpNode _.RC:$src1,
5423 (_.VT (bitconvert (_.LdFrag addr:$src2))))),
5424 itins.rm>, AVX5128IBase, EVEX_4V,
5425 EVEX_CD8<_.EltSize, CD8VF>,
5426 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5430 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5431 OpndItins itins, X86VectorVTInfo _> {
5432 let ExeDomain = _.ExeDomain in
5433 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5434 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5435 "${src2}"##_.BroadcastStr##", $src1",
5436 "$src1, ${src2}"##_.BroadcastStr,
5437 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5438 (_.ScalarLdFrag addr:$src2))))),
5439 itins.rm>, AVX5128IBase, EVEX_B,
5440 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5441 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5444 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5445 OpndItins itins, AVX512VLVectorVTInfo _> {
5446 let Predicates = [HasAVX512] in
5447 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5448 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5450 let Predicates = [HasAVX512, HasVLX] in {
5451 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5452 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5453 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5454 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
5458 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5459 SDNode OpNode, OpndItins itins> {
5460 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins,
5462 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins,
5463 avx512vl_i64_info>, VEX_W;
5466 // Use 512bit version to implement 128/256 bit in case NoVLX.
5467 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5468 SDNode OpNode, list<Predicate> p> {
5469 let Predicates = p in {
5470 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5471 (_.info256.VT _.info256.RC:$src2))),
5473 (!cast<Instruction>(OpcodeStr#"Zrr")
5474 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5475 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5478 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5479 (_.info128.VT _.info128.RC:$src2))),
5481 (!cast<Instruction>(OpcodeStr#"Zrr")
5482 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5483 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5487 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
5488 SDNode OpNode, OpndItins itins> {
5489 let Predicates = [HasBWI] in
5490 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>,
5492 let Predicates = [HasVLX, HasBWI] in {
5494 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>,
5496 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>,
5501 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, SSE_INTSHIFT_P>,
5502 avx512_var_shift_w<0x12, "vpsllvw", shl, SSE_INTSHIFT_P>;
5504 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, SSE_INTSHIFT_P>,
5505 avx512_var_shift_w<0x11, "vpsravw", sra, SSE_INTSHIFT_P>;
5507 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, SSE_INTSHIFT_P>,
5508 avx512_var_shift_w<0x10, "vpsrlvw", srl, SSE_INTSHIFT_P>;
5510 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SSE_INTSHIFT_P>;
5511 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SSE_INTSHIFT_P>;
5513 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5514 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5515 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5516 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5518 // Special handing for handling VPSRAV intrinsics.
5519 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5520 list<Predicate> p> {
5521 let Predicates = p in {
5522 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5523 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5525 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5526 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5527 _.RC:$src1, addr:$src2)>;
5528 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5529 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5530 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5531 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5532 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5533 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5535 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5536 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5537 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5538 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5539 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5540 _.RC:$src1, _.RC:$src2)>;
5541 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5542 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5544 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5545 _.RC:$src1, addr:$src2)>;
5549 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5550 list<Predicate> p> :
5551 avx512_var_shift_int_lowering<InstrStr, _, p> {
5552 let Predicates = p in {
5553 def : Pat<(_.VT (X86vsrav _.RC:$src1,
5554 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5555 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5556 _.RC:$src1, addr:$src2)>;
5557 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5558 (X86vsrav _.RC:$src1,
5559 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5561 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5562 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5563 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5564 (X86vsrav _.RC:$src1,
5565 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5567 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5568 _.RC:$src1, addr:$src2)>;
5572 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5573 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5574 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5575 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5576 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5577 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5578 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5579 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5580 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5583 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5584 let Predicates = [HasAVX512, NoVLX] in {
5585 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5586 (EXTRACT_SUBREG (v8i64
5588 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5589 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5591 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5592 (EXTRACT_SUBREG (v8i64
5594 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5595 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5598 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5599 (EXTRACT_SUBREG (v16i32
5601 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5602 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5604 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5605 (EXTRACT_SUBREG (v16i32
5607 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5608 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5611 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5612 (EXTRACT_SUBREG (v8i64
5614 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5615 imm:$src2)), sub_xmm)>;
5616 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5617 (EXTRACT_SUBREG (v8i64
5619 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5620 imm:$src2)), sub_ymm)>;
5622 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5623 (EXTRACT_SUBREG (v16i32
5625 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5626 imm:$src2)), sub_xmm)>;
5627 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5628 (EXTRACT_SUBREG (v16i32
5630 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5631 imm:$src2)), sub_ymm)>;
5634 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5635 let Predicates = [HasAVX512, NoVLX] in {
5636 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5637 (EXTRACT_SUBREG (v8i64
5639 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5640 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5642 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5643 (EXTRACT_SUBREG (v8i64
5645 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5646 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5649 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5650 (EXTRACT_SUBREG (v16i32
5652 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5653 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
5655 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5656 (EXTRACT_SUBREG (v16i32
5658 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5659 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
5662 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5663 (EXTRACT_SUBREG (v8i64
5665 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5666 imm:$src2)), sub_xmm)>;
5667 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5668 (EXTRACT_SUBREG (v8i64
5670 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5671 imm:$src2)), sub_ymm)>;
5673 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5674 (EXTRACT_SUBREG (v16i32
5676 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5677 imm:$src2)), sub_xmm)>;
5678 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5679 (EXTRACT_SUBREG (v16i32
5681 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5682 imm:$src2)), sub_ymm)>;
5685 //===-------------------------------------------------------------------===//
5686 // 1-src variable permutation VPERMW/D/Q
5687 //===-------------------------------------------------------------------===//
5688 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5689 OpndItins itins, AVX512VLVectorVTInfo _> {
5690 let Predicates = [HasAVX512] in
5691 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5692 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
5694 let Predicates = [HasAVX512, HasVLX] in
5695 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5696 avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
5699 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5700 string OpcodeStr, SDNode OpNode,
5701 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
5702 let Predicates = [HasAVX512] in
5703 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5704 itins, VTInfo.info512>,
5705 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5706 itins, VTInfo.info512>, EVEX_V512;
5707 let Predicates = [HasAVX512, HasVLX] in
5708 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5709 itins, VTInfo.info256>,
5710 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5711 itins, VTInfo.info256>, EVEX_V256;
5714 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5715 Predicate prd, SDNode OpNode,
5716 OpndItins itins, AVX512VLVectorVTInfo _> {
5717 let Predicates = [prd] in
5718 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>,
5720 let Predicates = [HasVLX, prd] in {
5721 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>,
5723 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>,
5728 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
5729 AVX2_PERMV_I, avx512vl_i16_info>, VEX_W;
5730 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
5731 AVX2_PERMV_I, avx512vl_i8_info>;
5733 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
5734 AVX2_PERMV_I, avx512vl_i32_info>;
5735 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
5736 AVX2_PERMV_I, avx512vl_i64_info>, VEX_W;
5737 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
5738 AVX2_PERMV_F, avx512vl_f32_info>;
5739 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
5740 AVX2_PERMV_F, avx512vl_f64_info>, VEX_W;
5742 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
5743 X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>,
5744 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5745 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
5746 X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>,
5747 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5748 //===----------------------------------------------------------------------===//
5749 // AVX-512 - VPERMIL
5750 //===----------------------------------------------------------------------===//
5752 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
5753 OpndItins itins, X86VectorVTInfo _,
5754 X86VectorVTInfo Ctrl> {
5755 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
5756 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
5757 "$src2, $src1", "$src1, $src2",
5758 (_.VT (OpNode _.RC:$src1,
5759 (Ctrl.VT Ctrl.RC:$src2))), itins.rr>,
5760 T8PD, EVEX_4V, Sched<[itins.Sched]>;
5761 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5762 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
5763 "$src2, $src1", "$src1, $src2",
5766 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2))))),
5767 itins.rm>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5768 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5769 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5770 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5771 "${src2}"##_.BroadcastStr##", $src1",
5772 "$src1, ${src2}"##_.BroadcastStr,
5775 (Ctrl.VT (X86VBroadcast
5776 (Ctrl.ScalarLdFrag addr:$src2))))),
5777 itins.rm>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
5778 Sched<[itins.Sched.Folded, ReadAfterLd]>;
5781 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
5782 OpndItins itins, AVX512VLVectorVTInfo _,
5783 AVX512VLVectorVTInfo Ctrl> {
5784 let Predicates = [HasAVX512] in {
5785 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5786 _.info512, Ctrl.info512>, EVEX_V512;
5788 let Predicates = [HasAVX512, HasVLX] in {
5789 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5790 _.info128, Ctrl.info128>, EVEX_V128;
5791 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins,
5792 _.info256, Ctrl.info256>, EVEX_V256;
5796 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
5797 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
5798 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>;
5799 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
5800 X86VPermilpi, AVX_VPERMILV, _>,
5801 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
5804 let ExeDomain = SSEPackedSingle in
5805 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
5807 let ExeDomain = SSEPackedDouble in
5808 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
5809 avx512vl_i64_info>, VEX_W;
5811 //===----------------------------------------------------------------------===//
5812 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
5813 //===----------------------------------------------------------------------===//
5815 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
5816 X86PShufd, SSE_PSHUF, avx512vl_i32_info>,
5817 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
5818 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
5819 X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base;
5820 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
5821 X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base;
5823 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5825 let Predicates = [HasBWI] in
5826 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512;
5828 let Predicates = [HasVLX, HasBWI] in {
5829 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256;
5830 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128;
5834 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG;
5836 //===----------------------------------------------------------------------===//
5837 // Move Low to High and High to Low packed FP Instructions
5838 //===----------------------------------------------------------------------===//
5839 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
5840 (ins VR128X:$src1, VR128X:$src2),
5841 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5842 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
5843 IIC_SSE_MOV_LH>, EVEX_4V;
5844 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
5845 (ins VR128X:$src1, VR128X:$src2),
5846 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5847 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
5848 IIC_SSE_MOV_LH>, EVEX_4V;
5850 //===----------------------------------------------------------------------===//
5851 // VMOVHPS/PD VMOVLPS Instructions
5852 // All patterns was taken from SSS implementation.
5853 //===----------------------------------------------------------------------===//
5854 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
5855 X86VectorVTInfo _> {
5856 let ExeDomain = _.ExeDomain in
5857 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
5858 (ins _.RC:$src1, f64mem:$src2),
5859 !strconcat(OpcodeStr,
5860 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5864 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
5865 IIC_SSE_MOV_LH>, EVEX_4V;
5868 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
5869 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5870 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
5871 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5872 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
5873 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5874 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
5875 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5877 let Predicates = [HasAVX512] in {
5879 def : Pat<(X86Movlhps VR128X:$src1,
5880 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
5881 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5882 def : Pat<(X86Movlhps VR128X:$src1,
5883 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
5884 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5886 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
5887 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
5888 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
5890 def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
5891 (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
5893 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
5894 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
5895 def : Pat<(v2f64 (X86Movsd VR128X:$src1,
5896 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
5897 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
5900 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
5901 (ins f64mem:$dst, VR128X:$src),
5902 "vmovhps\t{$src, $dst|$dst, $src}",
5903 [(store (f64 (extractelt
5904 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
5905 (bc_v2f64 (v4f32 VR128X:$src))),
5906 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5907 EVEX, EVEX_CD8<32, CD8VT2>;
5908 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
5909 (ins f64mem:$dst, VR128X:$src),
5910 "vmovhpd\t{$src, $dst|$dst, $src}",
5911 [(store (f64 (extractelt
5912 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
5913 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
5914 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
5915 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
5916 (ins f64mem:$dst, VR128X:$src),
5917 "vmovlps\t{$src, $dst|$dst, $src}",
5918 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
5919 (iPTR 0))), addr:$dst)],
5921 EVEX, EVEX_CD8<32, CD8VT2>;
5922 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
5923 (ins f64mem:$dst, VR128X:$src),
5924 "vmovlpd\t{$src, $dst|$dst, $src}",
5925 [(store (f64 (extractelt (v2f64 VR128X:$src),
5926 (iPTR 0))), addr:$dst)],
5928 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
5930 let Predicates = [HasAVX512] in {
5932 def : Pat<(store (f64 (extractelt
5933 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
5934 (iPTR 0))), addr:$dst),
5935 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
5937 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
5939 (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
5941 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
5943 (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
5945 //===----------------------------------------------------------------------===//
5946 // FMA - Fused Multiply Operations
5949 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5950 X86VectorVTInfo _, string Suff> {
5951 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5952 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
5953 (ins _.RC:$src2, _.RC:$src3),
5954 OpcodeStr, "$src3, $src2", "$src2, $src3",
5955 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), NoItinerary, 1, 1>,
5956 AVX512FMA3Base, Sched<[WriteFMA]>;
5958 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
5959 (ins _.RC:$src2, _.MemOp:$src3),
5960 OpcodeStr, "$src3, $src2", "$src2, $src3",
5961 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
5962 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
5964 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
5965 (ins _.RC:$src2, _.ScalarMemOp:$src3),
5966 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
5967 !strconcat("$src2, ${src3}", _.BroadcastStr ),
5969 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))),
5970 NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
5971 Sched<[WriteFMALd, ReadAfterLd]>;
5975 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5976 X86VectorVTInfo _, string Suff> {
5977 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
5978 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
5979 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
5980 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
5981 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))),
5982 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
5985 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
5986 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
5988 let Predicates = [HasAVX512] in {
5989 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
5990 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
5991 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
5993 let Predicates = [HasVLX, HasAVX512] in {
5994 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
5995 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
5996 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
5997 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6001 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6002 SDNode OpNodeRnd > {
6003 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6004 avx512vl_f32_info, "PS">;
6005 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6006 avx512vl_f64_info, "PD">, VEX_W;
6009 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6010 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6011 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6012 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6013 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6014 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6017 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6018 X86VectorVTInfo _, string Suff> {
6019 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6020 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6021 (ins _.RC:$src2, _.RC:$src3),
6022 OpcodeStr, "$src3, $src2", "$src2, $src3",
6023 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), NoItinerary, 1, 1,
6024 vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
6026 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6027 (ins _.RC:$src2, _.MemOp:$src3),
6028 OpcodeStr, "$src3, $src2", "$src2, $src3",
6029 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6030 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6032 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6033 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6034 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6035 "$src2, ${src3}"##_.BroadcastStr,
6036 (_.VT (OpNode _.RC:$src2,
6037 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6038 _.RC:$src1)), NoItinerary, 1, 0>, AVX512FMA3Base, EVEX_B,
6039 Sched<[WriteFMALd, ReadAfterLd]>;
6043 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6044 X86VectorVTInfo _, string Suff> {
6045 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6046 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6047 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6048 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6049 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))),
6050 NoItinerary, 1, 1, vselect, 1>,
6051 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6054 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6055 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6057 let Predicates = [HasAVX512] in {
6058 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6059 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6060 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6062 let Predicates = [HasVLX, HasAVX512] in {
6063 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6064 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6065 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6066 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6070 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6071 SDNode OpNodeRnd > {
6072 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6073 avx512vl_f32_info, "PS">;
6074 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6075 avx512vl_f64_info, "PD">, VEX_W;
6078 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6079 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6080 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6081 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6082 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6083 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6085 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6086 X86VectorVTInfo _, string Suff> {
6087 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
6088 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6089 (ins _.RC:$src2, _.RC:$src3),
6090 OpcodeStr, "$src3, $src2", "$src2, $src3",
6091 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), NoItinerary,
6092 1, 1, vselect, 1>, AVX512FMA3Base, Sched<[WriteFMA]>;
6094 // Pattern is 312 order so that the load is in a different place from the
6095 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6096 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6097 (ins _.RC:$src2, _.MemOp:$src3),
6098 OpcodeStr, "$src3, $src2", "$src2, $src3",
6099 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6100 NoItinerary, 1, 0>, AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6102 // Pattern is 312 order so that the load is in a different place from the
6103 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6104 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6105 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6106 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6107 "$src2, ${src3}"##_.BroadcastStr,
6108 (_.VT (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6109 _.RC:$src1, _.RC:$src2)), NoItinerary, 1, 0>,
6110 AVX512FMA3Base, EVEX_B, Sched<[WriteFMALd, ReadAfterLd]>;
6114 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6115 X86VectorVTInfo _, string Suff> {
6116 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0 in
6117 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6118 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6119 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6120 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))),
6121 NoItinerary, 1, 1, vselect, 1>,
6122 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[WriteFMA]>;
6125 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6126 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6128 let Predicates = [HasAVX512] in {
6129 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6130 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6131 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6133 let Predicates = [HasVLX, HasAVX512] in {
6134 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6135 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6136 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6137 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6141 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6142 SDNode OpNodeRnd > {
6143 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6144 avx512vl_f32_info, "PS">;
6145 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6146 avx512vl_f64_info, "PD">, VEX_W;
6149 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6150 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6151 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6152 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6153 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6154 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6157 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6158 dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
6159 dag RHS_r, dag RHS_m, bit MaskOnlyReg> {
6160 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6161 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6162 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6163 "$src3, $src2", "$src2, $src3", RHS_VEC_r, NoItinerary, 1, 1>,
6164 AVX512FMA3Base, Sched<[WriteFMA]>;
6166 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6167 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6168 "$src3, $src2", "$src2, $src3", RHS_VEC_m, NoItinerary, 1, 1>,
6169 AVX512FMA3Base, Sched<[WriteFMALd, ReadAfterLd]>;
6171 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6172 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6173 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb,
6174 NoItinerary, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC,
6177 let isCodeGenOnly = 1, isCommutable = 1 in {
6178 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6179 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6180 !strconcat(OpcodeStr,
6181 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6182 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[WriteFMA]>;
6183 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6184 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6185 !strconcat(OpcodeStr,
6186 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6187 [RHS_m]>, Sched<[WriteFMALd, ReadAfterLd]>;
6188 }// isCodeGenOnly = 1
6189 }// Constraints = "$src1 = $dst"
6192 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6193 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6194 SDNode OpNodeRnds1, SDNode OpNodes3,
6195 SDNode OpNodeRnds3, X86VectorVTInfo _,
6197 let ExeDomain = _.ExeDomain in {
6198 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6199 // Operands for intrinsic are in 123 order to preserve passthu
6201 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2, _.RC:$src3)),
6202 (_.VT (OpNodes1 _.RC:$src1, _.RC:$src2,
6203 _.ScalarIntMemCPat:$src3)),
6204 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
6206 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6208 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6209 (_.ScalarLdFrag addr:$src3)))), 0>;
6211 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6212 (_.VT (OpNodes3 _.RC:$src2, _.RC:$src3, _.RC:$src1)),
6213 (_.VT (OpNodes3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6215 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
6217 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6219 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6220 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))), 1>;
6222 // One pattern is 312 order so that the load is in a different place from the
6223 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6224 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6226 (_.VT (OpNodes1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6229 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6231 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6232 _.FRC:$src1, _.FRC:$src2))), 1>;
6236 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6237 string OpcodeStr, SDNode OpNode, SDNode OpNodes1,
6238 SDNode OpNodeRnds1, SDNode OpNodes3,
6239 SDNode OpNodeRnds3> {
6240 let Predicates = [HasAVX512] in {
6241 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6242 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6244 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6245 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6246 OpNodes1, OpNodeRnds1, OpNodes3, OpNodeRnds3,
6248 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6252 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86Fmadds1,
6253 X86FmaddRnds1, X86Fmadds3, X86FmaddRnds3>;
6254 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86Fmsubs1,
6255 X86FmsubRnds1, X86Fmsubs3, X86FmsubRnds3>;
6256 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86Fnmadds1,
6257 X86FnmaddRnds1, X86Fnmadds3, X86FnmaddRnds3>;
6258 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1,
6259 X86FnmsubRnds1, X86Fnmsubs3, X86FnmsubRnds3>;
6261 //===----------------------------------------------------------------------===//
6262 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6263 //===----------------------------------------------------------------------===//
6264 let Constraints = "$src1 = $dst" in {
6265 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6266 OpndItins itins, X86VectorVTInfo _> {
6267 // NOTE: The SDNode have the multiply operands first with the add last.
6268 // This enables commuted load patterns to be autogenerated by tablegen.
6269 let ExeDomain = _.ExeDomain in {
6270 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6271 (ins _.RC:$src2, _.RC:$src3),
6272 OpcodeStr, "$src3, $src2", "$src2, $src3",
6273 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), itins.rr, 1, 1>,
6274 AVX512FMA3Base, Sched<[itins.Sched]>;
6276 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6277 (ins _.RC:$src2, _.MemOp:$src3),
6278 OpcodeStr, "$src3, $src2", "$src2, $src3",
6279 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6280 itins.rm>, AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6282 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6283 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6284 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6285 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6287 (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6288 _.RC:$src1), itins.rm>,
6289 AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6292 } // Constraints = "$src1 = $dst"
6294 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6295 OpndItins itins, AVX512VLVectorVTInfo _> {
6296 let Predicates = [HasIFMA] in {
6297 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>,
6298 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6300 let Predicates = [HasVLX, HasIFMA] in {
6301 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>,
6302 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6303 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>,
6304 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6308 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6309 SSE_PMADD, avx512vl_i64_info>, VEX_W;
6310 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6311 SSE_PMADD, avx512vl_i64_info>, VEX_W;
6313 //===----------------------------------------------------------------------===//
6314 // AVX-512 Scalar convert from sign integer to float/double
6315 //===----------------------------------------------------------------------===//
6317 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins,
6318 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6319 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6320 let hasSideEffects = 0 in {
6321 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6322 (ins DstVT.FRC:$src1, SrcRC:$src),
6323 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6324 itins.rr>, EVEX_4V, Sched<[itins.Sched]>;
6326 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6327 (ins DstVT.FRC:$src1, x86memop:$src),
6328 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), [],
6329 itins.rm>, EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6330 } // hasSideEffects = 0
6331 let isCodeGenOnly = 1 in {
6332 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6333 (ins DstVT.RC:$src1, SrcRC:$src2),
6334 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6335 [(set DstVT.RC:$dst,
6336 (OpNode (DstVT.VT DstVT.RC:$src1),
6338 (i32 FROUND_CURRENT)))], itins.rr>,
6339 EVEX_4V, Sched<[itins.Sched]>;
6341 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6342 (ins DstVT.RC:$src1, x86memop:$src2),
6343 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6344 [(set DstVT.RC:$dst,
6345 (OpNode (DstVT.VT DstVT.RC:$src1),
6346 (ld_frag addr:$src2),
6347 (i32 FROUND_CURRENT)))], itins.rm>,
6348 EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6349 }//isCodeGenOnly = 1
6352 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins,
6353 RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> {
6354 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6355 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
6357 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
6358 [(set DstVT.RC:$dst,
6359 (OpNode (DstVT.VT DstVT.RC:$src1),
6361 (i32 imm:$rc)))], itins.rr>,
6362 EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
6365 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins,
6366 RegisterClass SrcRC, X86VectorVTInfo DstVT,
6367 X86MemOperand x86memop, PatFrag ld_frag, string asm> {
6368 defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>,
6369 avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop,
6370 ld_frag, asm>, VEX_LIG;
6373 let Predicates = [HasAVX512] in {
6374 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32,
6375 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6376 XS, EVEX_CD8<32, CD8VT1>;
6377 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64,
6378 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6379 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6380 defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32,
6381 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6382 XD, EVEX_CD8<32, CD8VT1>;
6383 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64,
6384 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6385 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6387 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6388 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6389 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6390 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6392 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6393 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6394 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
6395 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6396 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6397 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6398 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
6399 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6401 def : Pat<(f32 (sint_to_fp GR32:$src)),
6402 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6403 def : Pat<(f32 (sint_to_fp GR64:$src)),
6404 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6405 def : Pat<(f64 (sint_to_fp GR32:$src)),
6406 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6407 def : Pat<(f64 (sint_to_fp GR64:$src)),
6408 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6410 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32,
6411 v4f32x_info, i32mem, loadi32,
6412 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
6413 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64,
6414 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6415 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6416 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info,
6417 i32mem, loadi32, "cvtusi2sd{l}">,
6418 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
6419 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64,
6420 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6421 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6423 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6424 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6425 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6426 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6428 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6429 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6430 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6431 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6432 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6433 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6434 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6435 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6437 def : Pat<(f32 (uint_to_fp GR32:$src)),
6438 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6439 def : Pat<(f32 (uint_to_fp GR64:$src)),
6440 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6441 def : Pat<(f64 (uint_to_fp GR32:$src)),
6442 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6443 def : Pat<(f64 (uint_to_fp GR64:$src)),
6444 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6447 //===----------------------------------------------------------------------===//
6448 // AVX-512 Scalar convert from float/double to integer
6449 //===----------------------------------------------------------------------===//
6451 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
6452 X86VectorVTInfo DstVT, SDNode OpNode,
6453 OpndItins itins, string asm> {
6454 let Predicates = [HasAVX512] in {
6455 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
6456 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6457 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))],
6458 itins.rr>, EVEX, VEX_LIG, Sched<[itins.Sched]>;
6459 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
6460 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6461 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))],
6462 itins.rr>, EVEX, VEX_LIG, EVEX_B, EVEX_RC,
6463 Sched<[itins.Sched]>;
6464 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
6465 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6466 [(set DstVT.RC:$dst, (OpNode
6467 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
6468 (i32 FROUND_CURRENT)))], itins.rm>,
6469 EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6470 } // Predicates = [HasAVX512]
6473 // Convert float/double to signed/unsigned int 32/64
6474 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
6475 X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si">,
6476 XS, EVEX_CD8<32, CD8VT1>;
6477 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
6478 X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si">,
6479 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
6480 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info,
6481 X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi">,
6482 XS, EVEX_CD8<32, CD8VT1>;
6483 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info,
6484 X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi">,
6485 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
6486 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
6487 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">,
6488 XD, EVEX_CD8<64, CD8VT1>;
6489 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
6490 X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si">,
6491 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6492 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info,
6493 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">,
6494 XD, EVEX_CD8<64, CD8VT1>;
6495 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
6496 X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi">,
6497 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6499 // The SSE version of these instructions are disabled for AVX512.
6500 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6501 let Predicates = [HasAVX512] in {
6502 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
6503 (VCVTSS2SIZrr_Int VR128X:$src)>;
6504 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
6505 (VCVTSS2SIZrm_Int sse_load_f32:$src)>;
6506 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
6507 (VCVTSS2SI64Zrr_Int VR128X:$src)>;
6508 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
6509 (VCVTSS2SI64Zrm_Int sse_load_f32:$src)>;
6510 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
6511 (VCVTSD2SIZrr_Int VR128X:$src)>;
6512 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
6513 (VCVTSD2SIZrm_Int sse_load_f64:$src)>;
6514 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
6515 (VCVTSD2SI64Zrr_Int VR128X:$src)>;
6516 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
6517 (VCVTSD2SI64Zrm_Int sse_load_f64:$src)>;
6520 let Predicates = [HasAVX512] in {
6521 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6522 (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6523 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6524 (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6525 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6526 (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6527 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6528 (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6529 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6530 (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6531 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6532 (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6533 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6534 (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6535 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6536 (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6537 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6538 (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6539 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6540 (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6541 } // Predicates = [HasAVX512]
6543 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6544 // which produce unnecessary vmovs{s,d} instructions
6545 let Predicates = [HasAVX512] in {
6546 def : Pat<(v4f32 (X86Movss
6547 (v4f32 VR128X:$dst),
6548 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6549 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6551 def : Pat<(v4f32 (X86Movss
6552 (v4f32 VR128X:$dst),
6553 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6554 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6556 def : Pat<(v2f64 (X86Movsd
6557 (v2f64 VR128X:$dst),
6558 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6559 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6561 def : Pat<(v2f64 (X86Movsd
6562 (v2f64 VR128X:$dst),
6563 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6564 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6565 } // Predicates = [HasAVX512]
6567 // Convert float/double to signed/unsigned int 32/64 with truncation
6568 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6569 X86VectorVTInfo _DstRC, SDNode OpNode,
6570 SDNode OpNodeRnd, OpndItins itins, string aliasStr>{
6571 let Predicates = [HasAVX512] in {
6572 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6573 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6574 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))], itins.rr>,
6575 EVEX, Sched<[itins.Sched]>;
6576 let hasSideEffects = 0 in
6577 def rrb : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6578 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6579 [], itins.rr>, EVEX, EVEX_B, Sched<[itins.Sched]>;
6580 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
6581 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6582 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))],
6583 itins.rm>, EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>;
6585 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6586 (!cast<Instruction>(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6587 def : InstAlias<asm # aliasStr # "\t\t{{sae}, $src, $dst|$dst, $src, {sae}}",
6588 (!cast<Instruction>(NAME # "rrb") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6589 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6590 (!cast<Instruction>(NAME # "rm") _DstRC.RC:$dst,
6591 _SrcRC.ScalarMemOp:$src), 0>;
6593 let isCodeGenOnly = 1 in {
6594 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6595 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6596 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6597 (i32 FROUND_CURRENT)))], itins.rr>,
6598 EVEX, VEX_LIG, Sched<[itins.Sched]>;
6599 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6600 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6601 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6602 (i32 FROUND_NO_EXC)))], itins.rr>,
6603 EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>;
6604 let mayLoad = 1, hasSideEffects = 0 in
6605 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6606 (ins _SrcRC.IntScalarMemOp:$src),
6607 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6608 [], itins.rm>, EVEX, VEX_LIG,
6609 Sched<[itins.Sched.Folded, ReadAfterLd]>;
6610 } // isCodeGenOnly = 1
6615 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
6616 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">,
6617 XS, EVEX_CD8<32, CD8VT1>;
6618 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
6619 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">,
6620 VEX_W, XS, EVEX_CD8<32, CD8VT1>;
6621 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
6622 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">,
6623 XD, EVEX_CD8<64, CD8VT1>;
6624 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
6625 fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">,
6626 VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6628 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
6629 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">,
6630 XS, EVEX_CD8<32, CD8VT1>;
6631 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
6632 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">,
6633 XS,VEX_W, EVEX_CD8<32, CD8VT1>;
6634 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
6635 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">,
6636 XD, EVEX_CD8<64, CD8VT1>;
6637 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
6638 fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">,
6639 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6640 let Predicates = [HasAVX512] in {
6641 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
6642 (VCVTTSS2SIZrr_Int VR128X:$src)>;
6643 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6644 (VCVTTSS2SIZrm_Int ssmem:$src)>;
6645 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
6646 (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
6647 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6648 (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
6649 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
6650 (VCVTTSD2SIZrr_Int VR128X:$src)>;
6651 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6652 (VCVTTSD2SIZrm_Int sdmem:$src)>;
6653 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
6654 (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
6655 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6656 (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
6659 //===----------------------------------------------------------------------===//
6660 // AVX-512 Convert form float to double and back
6661 //===----------------------------------------------------------------------===//
6663 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6664 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> {
6665 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6666 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6667 "$src2, $src1", "$src1, $src2",
6668 (_.VT (OpNode (_.VT _.RC:$src1),
6669 (_Src.VT _Src.RC:$src2),
6670 (i32 FROUND_CURRENT))), itins.rr>,
6671 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
6672 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6673 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
6674 "$src2, $src1", "$src1, $src2",
6675 (_.VT (OpNode (_.VT _.RC:$src1),
6676 (_Src.VT _Src.ScalarIntMemCPat:$src2),
6677 (i32 FROUND_CURRENT))), itins.rm>,
6679 Sched<[itins.Sched.Folded, ReadAfterLd]>;
6681 let isCodeGenOnly = 1, hasSideEffects = 0 in {
6682 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6683 (ins _.FRC:$src1, _Src.FRC:$src2),
6684 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6685 itins.rr>, EVEX_4V, VEX_LIG, Sched<[itins.Sched]>;
6687 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6688 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
6689 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
6690 itins.rm>, EVEX_4V, VEX_LIG,
6691 Sched<[itins.Sched.Folded, ReadAfterLd]>;
6695 // Scalar Coversion with SAE - suppress all exceptions
6696 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6697 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
6698 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6699 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6700 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
6701 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6702 (_Src.VT _Src.RC:$src2),
6703 (i32 FROUND_NO_EXC))), itins.rr>,
6704 EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>;
6707 // Scalar Conversion with rounding control (RC)
6708 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6709 X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> {
6710 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6711 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
6712 "$rc, $src2, $src1", "$src1, $src2, $rc",
6713 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6714 (_Src.VT _Src.RC:$src2), (i32 imm:$rc))),
6716 EVEX_4V, VEX_LIG, Sched<[itins.Sched]>,
6719 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
6720 SDNode OpNodeRnd, OpndItins itins,
6721 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
6722 let Predicates = [HasAVX512] in {
6723 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6724 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
6725 OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
6729 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
6730 SDNode OpNodeRnd, OpndItins itins,
6731 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
6732 let Predicates = [HasAVX512] in {
6733 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6734 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>,
6735 EVEX_CD8<32, CD8VT1>, XS;
6738 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
6739 X86froundRnd, SSE_CVT_SD2SS, f64x_info,
6740 f32x_info>, NotMemoryFoldable;
6741 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
6742 X86fpextRnd, SSE_CVT_SS2SD, f32x_info,
6743 f64x_info>, NotMemoryFoldable;
6745 def : Pat<(f64 (fpextend FR32X:$src)),
6746 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
6747 Requires<[HasAVX512]>;
6748 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
6749 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6750 Requires<[HasAVX512]>;
6752 def : Pat<(f64 (extloadf32 addr:$src)),
6753 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6754 Requires<[HasAVX512, OptForSize]>;
6756 def : Pat<(f64 (extloadf32 addr:$src)),
6757 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
6758 Requires<[HasAVX512, OptForSpeed]>;
6760 def : Pat<(f32 (fpround FR64X:$src)),
6761 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
6762 Requires<[HasAVX512]>;
6764 def : Pat<(v4f32 (X86Movss
6765 (v4f32 VR128X:$dst),
6766 (v4f32 (scalar_to_vector
6767 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
6768 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
6769 Requires<[HasAVX512]>;
6771 def : Pat<(v2f64 (X86Movsd
6772 (v2f64 VR128X:$dst),
6773 (v2f64 (scalar_to_vector
6774 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
6775 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
6776 Requires<[HasAVX512]>;
6778 //===----------------------------------------------------------------------===//
6779 // AVX-512 Vector convert from signed/unsigned integer to float/double
6780 // and from float/double to signed/unsigned integer
6781 //===----------------------------------------------------------------------===//
6783 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6784 X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins,
6785 string Broadcast = _.BroadcastStr,
6786 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
6788 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6789 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
6790 (_.VT (OpNode (_Src.VT _Src.RC:$src))), itins.rr>,
6791 EVEX, Sched<[itins.Sched]>;
6793 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6794 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
6795 (_.VT (OpNode (_Src.VT
6796 (bitconvert (_Src.LdFrag addr:$src))))), itins.rm>,
6797 EVEX, Sched<[itins.Sched.Folded]>;
6799 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6800 (ins _Src.ScalarMemOp:$src), OpcodeStr,
6801 "${src}"##Broadcast, "${src}"##Broadcast,
6802 (_.VT (OpNode (_Src.VT
6803 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
6804 )), itins.rm>, EVEX, EVEX_B,
6805 Sched<[itins.Sched.Folded]>;
6807 // Coversion with SAE - suppress all exceptions
6808 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6809 X86VectorVTInfo _Src, SDNode OpNodeRnd,
6811 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6812 (ins _Src.RC:$src), OpcodeStr,
6813 "{sae}, $src", "$src, {sae}",
6814 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
6815 (i32 FROUND_NO_EXC))), itins.rr>,
6816 EVEX, EVEX_B, Sched<[itins.Sched]>;
6819 // Conversion with rounding control (RC)
6820 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6821 X86VectorVTInfo _Src, SDNode OpNodeRnd,
6823 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6824 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
6825 "$rc, $src", "$src, $rc",
6826 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc))),
6827 itins.rr>, EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
6830 // Extend Float to Double
6831 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
6833 let Predicates = [HasAVX512] in {
6834 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info,
6836 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
6837 X86vfpextRnd, itins>, EVEX_V512;
6839 let Predicates = [HasVLX] in {
6840 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
6841 X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128;
6842 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend,
6847 // Truncate Double to Float
6848 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> {
6849 let Predicates = [HasAVX512] in {
6850 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>,
6851 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
6852 X86vfproundRnd, itins>, EVEX_V512;
6854 let Predicates = [HasVLX] in {
6855 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
6856 X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128;
6857 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
6858 itins, "{1to4}", "{y}">, EVEX_V256;
6860 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6861 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
6862 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6863 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
6864 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6865 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
6866 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6867 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
6871 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>,
6872 VEX_W, PD, EVEX_CD8<64, CD8VF>;
6873 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>,
6874 PS, EVEX_CD8<32, CD8VH>;
6876 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
6877 (VCVTPS2PDZrm addr:$src)>;
6879 let Predicates = [HasVLX] in {
6880 let AddedComplexity = 15 in {
6881 def : Pat<(X86vzmovl (v2f64 (bitconvert
6882 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
6883 (VCVTPD2PSZ128rr VR128X:$src)>;
6884 def : Pat<(X86vzmovl (v2f64 (bitconvert
6885 (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
6886 (VCVTPD2PSZ128rm addr:$src)>;
6888 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
6889 (VCVTPS2PDZ128rm addr:$src)>;
6890 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
6891 (VCVTPS2PDZ256rm addr:$src)>;
6894 // Convert Signed/Unsigned Doubleword to Double
6895 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
6896 SDNode OpNode128, OpndItins itins> {
6897 // No rounding in this op
6898 let Predicates = [HasAVX512] in
6899 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
6902 let Predicates = [HasVLX] in {
6903 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
6904 OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128;
6905 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
6910 // Convert Signed/Unsigned Doubleword to Float
6911 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
6912 SDNode OpNodeRnd, OpndItins itins> {
6913 let Predicates = [HasAVX512] in
6914 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
6916 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
6917 OpNodeRnd, itins>, EVEX_V512;
6919 let Predicates = [HasVLX] in {
6920 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
6922 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
6927 // Convert Float to Signed/Unsigned Doubleword with truncation
6928 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
6929 SDNode OpNodeRnd, OpndItins itins> {
6930 let Predicates = [HasAVX512] in {
6931 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
6933 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
6934 OpNodeRnd, itins>, EVEX_V512;
6936 let Predicates = [HasVLX] in {
6937 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
6939 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
6944 // Convert Float to Signed/Unsigned Doubleword
6945 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
6946 SDNode OpNodeRnd, OpndItins itins> {
6947 let Predicates = [HasAVX512] in {
6948 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
6950 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
6951 OpNodeRnd, itins>, EVEX_V512;
6953 let Predicates = [HasVLX] in {
6954 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
6956 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
6961 // Convert Double to Signed/Unsigned Doubleword with truncation
6962 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
6963 SDNode OpNode128, SDNode OpNodeRnd,
6965 let Predicates = [HasAVX512] in {
6966 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
6968 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
6969 OpNodeRnd, itins>, EVEX_V512;
6971 let Predicates = [HasVLX] in {
6972 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
6973 // memory forms of these instructions in Asm Parser. They have the same
6974 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
6975 // due to the same reason.
6976 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
6977 OpNode128, itins, "{1to2}", "{x}">, EVEX_V128;
6978 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
6979 itins, "{1to4}", "{y}">, EVEX_V256;
6981 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6982 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
6983 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6984 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
6985 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6986 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
6987 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6988 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
6992 // Convert Double to Signed/Unsigned Doubleword
6993 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
6994 SDNode OpNodeRnd, OpndItins itins> {
6995 let Predicates = [HasAVX512] in {
6996 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
6998 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
6999 OpNodeRnd, itins>, EVEX_V512;
7001 let Predicates = [HasVLX] in {
7002 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7003 // memory forms of these instructions in Asm Parcer. They have the same
7004 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7005 // due to the same reason.
7006 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7007 itins, "{1to2}", "{x}">, EVEX_V128;
7008 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7009 itins, "{1to4}", "{y}">, EVEX_V256;
7011 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7012 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7013 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7014 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7015 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7016 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7017 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7018 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
7022 // Convert Double to Signed/Unsigned Quardword
7023 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7024 SDNode OpNodeRnd, OpndItins itins> {
7025 let Predicates = [HasDQI] in {
7026 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7028 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7029 OpNodeRnd,itins>, EVEX_V512;
7031 let Predicates = [HasDQI, HasVLX] in {
7032 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7034 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7039 // Convert Double to Signed/Unsigned Quardword with truncation
7040 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7041 SDNode OpNodeRnd, OpndItins itins> {
7042 let Predicates = [HasDQI] in {
7043 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
7045 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7046 OpNodeRnd, itins>, EVEX_V512;
7048 let Predicates = [HasDQI, HasVLX] in {
7049 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
7051 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
7056 // Convert Signed/Unsigned Quardword to Double
7057 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7058 SDNode OpNodeRnd, OpndItins itins> {
7059 let Predicates = [HasDQI] in {
7060 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
7062 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7063 OpNodeRnd, itins>, EVEX_V512;
7065 let Predicates = [HasDQI, HasVLX] in {
7066 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
7068 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
7073 // Convert Float to Signed/Unsigned Quardword
7074 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7075 SDNode OpNodeRnd, OpndItins itins> {
7076 let Predicates = [HasDQI] in {
7077 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7079 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7080 OpNodeRnd, itins>, EVEX_V512;
7082 let Predicates = [HasDQI, HasVLX] in {
7083 // Explicitly specified broadcast string, since we take only 2 elements
7084 // from v4f32x_info source
7085 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7086 itins, "{1to2}", "", f64mem>, EVEX_V128;
7087 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7092 // Convert Float to Signed/Unsigned Quardword with truncation
7093 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7094 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
7095 let Predicates = [HasDQI] in {
7096 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
7098 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7099 OpNodeRnd, itins>, EVEX_V512;
7101 let Predicates = [HasDQI, HasVLX] in {
7102 // Explicitly specified broadcast string, since we take only 2 elements
7103 // from v4f32x_info source
7104 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
7105 itins, "{1to2}", "", f64mem>, EVEX_V128;
7106 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
7111 // Convert Signed/Unsigned Quardword to Float
7112 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7113 SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> {
7114 let Predicates = [HasDQI] in {
7115 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
7117 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7118 OpNodeRnd, itins>, EVEX_V512;
7120 let Predicates = [HasDQI, HasVLX] in {
7121 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7122 // memory forms of these instructions in Asm Parcer. They have the same
7123 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7124 // due to the same reason.
7125 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7126 itins, "{1to2}", "{x}">, EVEX_V128;
7127 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7128 itins, "{1to4}", "{y}">, EVEX_V256;
7130 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7131 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7132 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7133 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7134 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7135 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7136 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7137 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
7141 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP,
7142 SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>;
7144 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
7145 X86VSintToFpRnd, SSE_CVT_I2PS>,
7146 PS, EVEX_CD8<32, CD8VF>;
7148 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
7149 X86cvttp2siRnd, SSE_CVT_PS2I>,
7150 XS, EVEX_CD8<32, CD8VF>;
7152 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
7153 X86cvttp2siRnd, SSE_CVT_PD2I>,
7154 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7156 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
7157 X86cvttp2uiRnd, SSE_CVT_PS2I>, PS,
7158 EVEX_CD8<32, CD8VF>;
7160 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
7161 X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>,
7162 PS, VEX_W, EVEX_CD8<64, CD8VF>;
7164 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp,
7165 X86VUintToFP, SSE_CVT_I2PD>, XS,
7166 EVEX_CD8<32, CD8VH>;
7168 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
7169 X86VUintToFpRnd, SSE_CVT_I2PS>, XD,
7170 EVEX_CD8<32, CD8VF>;
7172 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
7173 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7174 EVEX_CD8<32, CD8VF>;
7176 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
7177 X86cvtp2IntRnd, SSE_CVT_PD2I>, XD,
7178 VEX_W, EVEX_CD8<64, CD8VF>;
7180 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
7181 X86cvtp2UIntRnd, SSE_CVT_PS2I>,
7182 PS, EVEX_CD8<32, CD8VF>;
7184 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
7185 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
7186 PS, EVEX_CD8<64, CD8VF>;
7188 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
7189 X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W,
7190 PD, EVEX_CD8<64, CD8VF>;
7192 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
7193 X86cvtp2IntRnd, SSE_CVT_PS2I>, PD,
7194 EVEX_CD8<32, CD8VH>;
7196 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
7197 X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W,
7198 PD, EVEX_CD8<64, CD8VF>;
7200 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
7201 X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD,
7202 EVEX_CD8<32, CD8VH>;
7204 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
7205 X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W,
7206 PD, EVEX_CD8<64, CD8VF>;
7208 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
7209 X86cvttp2siRnd, SSE_CVT_PS2I>, PD,
7210 EVEX_CD8<32, CD8VH>;
7212 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
7213 X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W,
7214 PD, EVEX_CD8<64, CD8VF>;
7216 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
7217 X86cvttp2uiRnd, SSE_CVT_PS2I>, PD,
7218 EVEX_CD8<32, CD8VH>;
7220 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
7221 X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7222 EVEX_CD8<64, CD8VF>;
7224 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
7225 X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS,
7226 EVEX_CD8<64, CD8VF>;
7228 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
7229 X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS,
7230 EVEX_CD8<64, CD8VF>;
7232 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
7233 X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD,
7234 EVEX_CD8<64, CD8VF>;
7236 let Predicates = [HasAVX512, NoVLX] in {
7237 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
7238 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7239 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7240 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7242 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7243 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7244 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7245 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7247 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7248 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
7249 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7250 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7252 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7253 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7254 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7255 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7257 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7258 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7259 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7260 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7262 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7263 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7264 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7265 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7267 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
7268 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7269 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7270 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7273 let Predicates = [HasAVX512, HasVLX] in {
7274 let AddedComplexity = 15 in {
7275 def : Pat<(X86vzmovl (v2i64 (bitconvert
7276 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
7277 (VCVTPD2DQZ128rr VR128X:$src)>;
7278 def : Pat<(X86vzmovl (v2i64 (bitconvert
7279 (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
7280 (VCVTPD2DQZ128rm addr:$src)>;
7281 def : Pat<(X86vzmovl (v2i64 (bitconvert
7282 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
7283 (VCVTPD2UDQZ128rr VR128X:$src)>;
7284 def : Pat<(X86vzmovl (v2i64 (bitconvert
7285 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
7286 (VCVTTPD2DQZ128rr VR128X:$src)>;
7287 def : Pat<(X86vzmovl (v2i64 (bitconvert
7288 (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
7289 (VCVTTPD2DQZ128rm addr:$src)>;
7290 def : Pat<(X86vzmovl (v2i64 (bitconvert
7291 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
7292 (VCVTTPD2UDQZ128rr VR128X:$src)>;
7295 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7296 (VCVTDQ2PDZ128rm addr:$src)>;
7297 def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7298 (VCVTDQ2PDZ128rm addr:$src)>;
7300 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
7301 (VCVTUDQ2PDZ128rm addr:$src)>;
7302 def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
7303 (VCVTUDQ2PDZ128rm addr:$src)>;
7306 let Predicates = [HasAVX512] in {
7307 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7308 (VCVTPD2PSZrm addr:$src)>;
7309 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7310 (VCVTPS2PDZrm addr:$src)>;
7313 let Predicates = [HasDQI, HasVLX] in {
7314 let AddedComplexity = 15 in {
7315 def : Pat<(X86vzmovl (v2f64 (bitconvert
7316 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
7317 (VCVTQQ2PSZ128rr VR128X:$src)>;
7318 def : Pat<(X86vzmovl (v2f64 (bitconvert
7319 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
7320 (VCVTUQQ2PSZ128rr VR128X:$src)>;
7324 let Predicates = [HasDQI, NoVLX] in {
7325 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7326 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7327 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7328 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7330 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7331 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7332 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7333 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7335 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7336 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7337 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7338 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7340 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7341 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7342 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7343 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7345 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7346 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7347 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7348 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7350 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7351 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7352 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7353 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7355 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7356 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7357 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7358 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7360 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7361 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7362 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7363 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7365 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7366 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7367 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7368 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7370 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7371 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7372 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7373 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7375 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7376 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7377 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7378 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7380 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7381 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7382 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7383 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7386 //===----------------------------------------------------------------------===//
7387 // Half precision conversion instructions
7388 //===----------------------------------------------------------------------===//
7390 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7391 X86MemOperand x86memop, PatFrag ld_frag,
7393 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
7394 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
7395 (X86cvtph2ps (_src.VT _src.RC:$src)),itins.rr>,
7396 T8PD, Sched<[itins.Sched]>;
7397 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
7398 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
7399 (X86cvtph2ps (_src.VT
7401 (ld_frag addr:$src)))), itins.rm>,
7402 T8PD, Sched<[itins.Sched.Folded]>;
7405 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7407 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
7408 (ins _src.RC:$src), "vcvtph2ps",
7409 "{sae}, $src", "$src, {sae}",
7410 (X86cvtph2psRnd (_src.VT _src.RC:$src),
7411 (i32 FROUND_NO_EXC)), itins.rr>,
7412 T8PD, EVEX_B, Sched<[itins.Sched]>;
7415 let Predicates = [HasAVX512] in
7416 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64,
7418 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>,
7419 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
7421 let Predicates = [HasVLX] in {
7422 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
7423 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256,
7424 EVEX_CD8<32, CD8VH>;
7425 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
7426 loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128,
7427 EVEX_CD8<32, CD8VH>;
7429 // Pattern match vcvtph2ps of a scalar i64 load.
7430 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzmovl_v2i64 addr:$src)))),
7431 (VCVTPH2PSZ128rm addr:$src)>;
7432 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (vzload_v2i64 addr:$src)))),
7433 (VCVTPH2PSZ128rm addr:$src)>;
7434 def : Pat<(v4f32 (X86cvtph2ps (v8i16 (bitconvert
7435 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
7436 (VCVTPH2PSZ128rm addr:$src)>;
7439 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7440 X86MemOperand x86memop, OpndItins itins> {
7441 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
7442 (ins _src.RC:$src1, i32u8imm:$src2),
7443 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
7444 (X86cvtps2ph (_src.VT _src.RC:$src1),
7446 itins.rr, 0, 0>, AVX512AIi8Base, Sched<[itins.Sched]>;
7447 let hasSideEffects = 0, mayStore = 1 in {
7448 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7449 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7450 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7451 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7452 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7453 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7454 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
7455 [], itins.rm>, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7459 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7461 let hasSideEffects = 0 in
7462 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
7463 (outs _dest.RC:$dst),
7464 (ins _src.RC:$src1, i32u8imm:$src2),
7465 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
7466 [], itins.rr>, EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>;
7469 let Predicates = [HasAVX512] in {
7470 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
7472 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info,
7473 SSE_CVT_PS2PH>, EVEX, EVEX_V512,
7474 EVEX_CD8<32, CD8VH>;
7475 let Predicates = [HasVLX] in {
7476 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
7477 SSE_CVT_PS2PH>, EVEX, EVEX_V256,
7478 EVEX_CD8<32, CD8VH>;
7479 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
7480 SSE_CVT_PS2PH>, EVEX, EVEX_V128,
7481 EVEX_CD8<32, CD8VH>;
7484 def : Pat<(store (f64 (extractelt
7485 (bc_v2f64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7486 (iPTR 0))), addr:$dst),
7487 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7488 def : Pat<(store (i64 (extractelt
7489 (bc_v2i64 (v8i16 (X86cvtps2ph VR128X:$src1, i32:$src2))),
7490 (iPTR 0))), addr:$dst),
7491 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, imm:$src2)>;
7492 def : Pat<(store (v8i16 (X86cvtps2ph VR256X:$src1, i32:$src2)), addr:$dst),
7493 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, imm:$src2)>;
7494 def : Pat<(store (v16i16 (X86cvtps2ph VR512:$src1, i32:$src2)), addr:$dst),
7495 (VCVTPS2PHZmr addr:$dst, VR512:$src1, imm:$src2)>;
7498 // Patterns for matching conversions from float to half-float and vice versa.
7499 let Predicates = [HasVLX] in {
7500 // Use MXCSR.RC for rounding instead of explicitly specifying the default
7501 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7502 // configurations we support (the default). However, falling back to MXCSR is
7503 // more consistent with other instructions, which are always controlled by it.
7504 // It's encoded as 0b100.
7505 def : Pat<(fp_to_f16 FR32X:$src),
7506 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7507 (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7509 def : Pat<(f16_to_fp GR16:$src),
7510 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7511 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7513 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7514 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7515 (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7518 // Unordered/Ordered scalar fp compare with Sea and set EFLAGS
7519 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
7520 string OpcodeStr, OpndItins itins> {
7521 let hasSideEffects = 0 in
7522 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7523 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7524 [], itins.rr>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7525 Sched<[itins.Sched]>;
7528 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7529 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>,
7530 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7531 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>,
7532 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7533 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>,
7534 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7535 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>,
7536 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7539 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7540 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
7541 "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7542 EVEX_CD8<32, CD8VT1>;
7543 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
7544 "ucomisd", SSE_COMIS>, PD, EVEX,
7545 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7546 let Pattern = []<dag> in {
7547 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
7548 "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7549 EVEX_CD8<32, CD8VT1>;
7550 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
7551 "comisd", SSE_COMIS>, PD, EVEX,
7552 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7554 let isCodeGenOnly = 1 in {
7555 defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
7556 sse_load_f32, "ucomiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7557 EVEX_CD8<32, CD8VT1>;
7558 defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
7559 sse_load_f64, "ucomisd", SSE_COMIS>, PD, EVEX,
7560 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7562 defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
7563 sse_load_f32, "comiss", SSE_COMIS>, PS, EVEX, VEX_LIG,
7564 EVEX_CD8<32, CD8VT1>;
7565 defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
7566 sse_load_f64, "comisd", SSE_COMIS>, PD, EVEX,
7567 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7571 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
7572 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7573 OpndItins itins, X86VectorVTInfo _> {
7574 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
7575 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7576 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7577 "$src2, $src1", "$src1, $src2",
7578 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
7579 EVEX_4V, Sched<[itins.Sched]>;
7580 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7581 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7582 "$src2, $src1", "$src1, $src2",
7583 (OpNode (_.VT _.RC:$src1),
7584 _.ScalarIntMemCPat:$src2), itins.rm>, EVEX_4V,
7585 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7589 defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>,
7590 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
7591 defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>,
7592 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
7593 defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>,
7594 EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable;
7595 defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>,
7596 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable;
7598 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7599 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
7600 OpndItins itins, X86VectorVTInfo _> {
7601 let ExeDomain = _.ExeDomain in {
7602 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7603 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7604 (_.FloatVT (OpNode _.RC:$src)), itins.rr>, EVEX, T8PD,
7605 Sched<[itins.Sched]>;
7606 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7607 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7609 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX, T8PD,
7610 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7611 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7612 (ins _.ScalarMemOp:$src), OpcodeStr,
7613 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7615 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7616 EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7620 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
7622 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s,
7623 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
7624 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d,
7625 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7627 // Define only if AVX512VL feature is present.
7628 let Predicates = [HasVLX] in {
7629 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7630 OpNode, itins.s, v4f32x_info>,
7631 EVEX_V128, EVEX_CD8<32, CD8VF>;
7632 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7633 OpNode, itins.s, v8f32x_info>,
7634 EVEX_V256, EVEX_CD8<32, CD8VF>;
7635 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7636 OpNode, itins.d, v2f64x_info>,
7637 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7638 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7639 OpNode, itins.d, v4f64x_info>,
7640 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7644 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>;
7645 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>;
7647 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
7648 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
7649 SDNode OpNode, OpndItins itins> {
7650 let ExeDomain = _.ExeDomain in {
7651 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7652 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7653 "$src2, $src1", "$src1, $src2",
7654 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7655 (i32 FROUND_CURRENT)), itins.rr>,
7656 Sched<[itins.Sched]>;
7658 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7659 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7660 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7661 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7662 (i32 FROUND_NO_EXC)), itins.rm>, EVEX_B,
7663 Sched<[itins.Sched]>;
7665 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7666 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7667 "$src2, $src1", "$src1, $src2",
7668 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
7669 (i32 FROUND_CURRENT)), itins.rm>,
7670 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7674 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7676 defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>,
7677 EVEX_CD8<32, CD8VT1>;
7678 defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>,
7679 EVEX_CD8<64, CD8VT1>, VEX_W;
7682 let Predicates = [HasERI] in {
7683 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>,
7685 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>,
7689 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>,
7691 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
7693 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7694 SDNode OpNode, OpndItins itins> {
7695 let ExeDomain = _.ExeDomain in {
7696 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7697 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7698 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT)),
7699 itins.rr>, Sched<[itins.Sched]>;
7701 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7702 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7704 (bitconvert (_.LdFrag addr:$src))),
7705 (i32 FROUND_CURRENT)), itins.rm>,
7706 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7708 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7709 (ins _.ScalarMemOp:$src), OpcodeStr,
7710 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7712 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
7713 (i32 FROUND_CURRENT)), itins.rm>, EVEX_B,
7714 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7717 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7718 SDNode OpNode, OpndItins itins> {
7719 let ExeDomain = _.ExeDomain in
7720 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7721 (ins _.RC:$src), OpcodeStr,
7722 "{sae}, $src", "$src, {sae}",
7723 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC)),
7724 itins.rr>, EVEX_B, Sched<[itins.Sched]>;
7727 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
7729 defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7730 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>,
7731 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
7732 defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7733 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>,
7734 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7737 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
7738 SDNode OpNode, SizeItins itins> {
7739 // Define only if AVX512VL feature is present.
7740 let Predicates = [HasVLX] in {
7741 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>,
7742 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
7743 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>,
7744 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
7745 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>,
7746 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7747 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>,
7748 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7751 let Predicates = [HasERI] in {
7753 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX;
7754 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX;
7755 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX;
7757 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>,
7758 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd,
7759 SSE_ALU_ITINS_P>, EVEX;
7761 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins,
7763 let ExeDomain = _.ExeDomain in
7764 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7765 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
7766 (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc))), itins.rr>,
7767 EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
7770 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins,
7772 let ExeDomain = _.ExeDomain in {
7773 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7774 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7775 (_.FloatVT (fsqrt _.RC:$src)), itins.rr>, EVEX,
7776 Sched<[itins.Sched]>;
7777 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7778 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7780 (bitconvert (_.LdFrag addr:$src)))), itins.rm>, EVEX,
7781 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7782 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7783 (ins _.ScalarMemOp:$src), OpcodeStr,
7784 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7786 (X86VBroadcast (_.ScalarLdFrag addr:$src)))), itins.rm>,
7787 EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7791 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> {
7792 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>,
7793 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7794 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>,
7795 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7796 // Define only if AVX512VL feature is present.
7797 let Predicates = [HasVLX] in {
7798 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7799 SSE_SQRTPS, v4f32x_info>,
7800 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
7801 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7802 SSE_SQRTPS, v8f32x_info>,
7803 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
7804 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7805 SSE_SQRTPD, v2f64x_info>,
7806 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7807 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7808 SSE_SQRTPD, v4f64x_info>,
7809 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7813 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> {
7814 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS,
7815 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7816 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD,
7817 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7820 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins,
7821 X86VectorVTInfo _, string SUFF, Intrinsic Intr> {
7822 let ExeDomain = _.ExeDomain in {
7823 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7824 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7825 "$src2, $src1", "$src1, $src2",
7826 (X86fsqrtRnds (_.VT _.RC:$src1),
7828 (i32 FROUND_CURRENT)), itins.rr>,
7829 Sched<[itins.Sched]>;
7830 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7831 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
7832 "$src2, $src1", "$src1, $src2",
7833 (X86fsqrtRnds (_.VT _.RC:$src1),
7834 _.ScalarIntMemCPat:$src2,
7835 (i32 FROUND_CURRENT)), itins.rm>,
7836 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7837 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7838 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
7839 "$rc, $src2, $src1", "$src1, $src2, $rc",
7840 (X86fsqrtRnds (_.VT _.RC:$src1),
7842 (i32 imm:$rc)), itins.rr>,
7843 EVEX_B, EVEX_RC, Sched<[itins.Sched]>;
7845 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7846 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7847 (ins _.FRC:$src1, _.FRC:$src2),
7848 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], itins.rr>,
7849 Sched<[itins.Sched]>;
7851 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7852 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
7853 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], itins.rm>,
7854 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7858 let Predicates = [HasAVX512] in {
7859 def : Pat<(_.EltVT (fsqrt _.FRC:$src)),
7860 (!cast<Instruction>(NAME#SUFF#Zr)
7861 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
7863 def : Pat<(Intr VR128X:$src),
7864 (!cast<Instruction>(NAME#SUFF#Zr_Int) VR128X:$src,
7868 let Predicates = [HasAVX512, OptForSize] in {
7869 def : Pat<(_.EltVT (fsqrt (load addr:$src))),
7870 (!cast<Instruction>(NAME#SUFF#Zm)
7871 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
7873 def : Pat<(Intr _.ScalarIntMemCPat:$src2),
7874 (!cast<Instruction>(NAME#SUFF#Zm_Int)
7875 (_.VT (IMPLICIT_DEF)), addr:$src2)>;
7880 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
7881 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS",
7882 int_x86_sse_sqrt_ss>,
7883 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable;
7884 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD",
7885 int_x86_sse2_sqrt_sd>,
7886 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W,
7890 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">,
7891 avx512_sqrt_packed_all_round<0x51, "vsqrt">;
7893 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
7895 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
7896 OpndItins itins, X86VectorVTInfo _> {
7897 let ExeDomain = _.ExeDomain in {
7898 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7899 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7900 "$src3, $src2, $src1", "$src1, $src2, $src3",
7901 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7902 (i32 imm:$src3))), itins.rr>,
7903 Sched<[itins.Sched]>;
7905 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7906 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7907 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
7908 (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7909 (i32 imm:$src3), (i32 FROUND_NO_EXC))), itins.rr>, EVEX_B,
7910 Sched<[itins.Sched]>;
7912 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7913 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
7915 "$src3, $src2, $src1", "$src1, $src2, $src3",
7916 (_.VT (X86RndScales _.RC:$src1,
7917 _.ScalarIntMemCPat:$src2, (i32 imm:$src3))), itins.rm>,
7918 Sched<[itins.Sched.Folded, ReadAfterLd]>;
7920 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7921 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7922 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
7923 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7924 [], itins.rr>, Sched<[itins.Sched]>;
7927 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7928 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
7929 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
7930 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
7934 let Predicates = [HasAVX512] in {
7935 def : Pat<(ffloor _.FRC:$src),
7936 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7937 _.FRC:$src, (i32 0x9)))>;
7938 def : Pat<(fceil _.FRC:$src),
7939 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7940 _.FRC:$src, (i32 0xa)))>;
7941 def : Pat<(ftrunc _.FRC:$src),
7942 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7943 _.FRC:$src, (i32 0xb)))>;
7944 def : Pat<(frint _.FRC:$src),
7945 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7946 _.FRC:$src, (i32 0x4)))>;
7947 def : Pat<(fnearbyint _.FRC:$src),
7948 (_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
7949 _.FRC:$src, (i32 0xc)))>;
7952 let Predicates = [HasAVX512, OptForSize] in {
7953 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)),
7954 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7955 addr:$src, (i32 0x9)))>;
7956 def : Pat<(fceil (_.ScalarLdFrag addr:$src)),
7957 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7958 addr:$src, (i32 0xa)))>;
7959 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)),
7960 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7961 addr:$src, (i32 0xb)))>;
7962 def : Pat<(frint (_.ScalarLdFrag addr:$src)),
7963 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7964 addr:$src, (i32 0x4)))>;
7965 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)),
7966 (_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
7967 addr:$src, (i32 0xc)))>;
7971 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S,
7972 f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
7974 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S,
7975 f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V,
7976 EVEX_CD8<64, CD8VT1>;
7978 //-------------------------------------------------
7979 // Integer truncate and extend operations
7980 //-------------------------------------------------
7982 let Sched = WriteShuffle256 in
7983 def AVX512_EXTEND : OpndItins<
7984 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
7987 let Sched = WriteShuffle256 in
7988 def AVX512_TRUNCATE : OpndItins<
7989 IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI
7992 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7993 OpndItins itins, X86VectorVTInfo SrcInfo,
7994 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
7995 let ExeDomain = DestInfo.ExeDomain in
7996 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
7997 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
7998 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
7999 itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>;
8001 let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8002 ExeDomain = DestInfo.ExeDomain in {
8003 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8004 (ins x86memop:$dst, SrcInfo.RC:$src),
8005 OpcodeStr # "\t{$src, $dst|$dst, $src}",
8006 [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>;
8008 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8009 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
8010 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8011 [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>;
8012 }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
8015 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8016 X86VectorVTInfo DestInfo,
8017 PatFrag truncFrag, PatFrag mtruncFrag > {
8019 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8020 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8021 addr:$dst, SrcInfo.RC:$src)>;
8023 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8024 (SrcInfo.VT SrcInfo.RC:$src)),
8025 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8026 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8029 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
8030 OpndItins itins, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
8031 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8032 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8033 X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
8034 Predicate prd = HasAVX512>{
8036 let Predicates = [HasVLX, prd] in {
8037 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8038 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
8039 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8040 truncFrag, mtruncFrag>, EVEX_V128;
8042 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8043 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
8044 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8045 truncFrag, mtruncFrag>, EVEX_V256;
8047 let Predicates = [prd] in
8048 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, itins,
8049 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
8050 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8051 truncFrag, mtruncFrag>, EVEX_V512;
8054 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8055 OpndItins itins, PatFrag StoreNode,
8056 PatFrag MaskedStoreNode> {
8057 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8058 v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
8059 StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
8062 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8063 OpndItins itins, PatFrag StoreNode,
8064 PatFrag MaskedStoreNode> {
8065 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8066 v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
8067 StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
8070 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8071 OpndItins itins, PatFrag StoreNode,
8072 PatFrag MaskedStoreNode> {
8073 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i64_info,
8074 v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
8075 StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
8078 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
8079 OpndItins itins, PatFrag StoreNode,
8080 PatFrag MaskedStoreNode> {
8081 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i32_info,
8082 v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
8083 StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
8086 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8087 OpndItins itins, PatFrag StoreNode,
8088 PatFrag MaskedStoreNode> {
8089 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i32_info,
8090 v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
8091 StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
8094 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8095 OpndItins itins, PatFrag StoreNode,
8096 PatFrag MaskedStoreNode> {
8097 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, itins, avx512vl_i16_info,
8098 v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
8099 StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
8102 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc, AVX512_TRUNCATE,
8103 truncstorevi8, masked_truncstorevi8>;
8104 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE,
8105 truncstore_s_vi8, masked_truncstore_s_vi8>;
8106 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE,
8107 truncstore_us_vi8, masked_truncstore_us_vi8>;
8109 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc, AVX512_TRUNCATE,
8110 truncstorevi16, masked_truncstorevi16>;
8111 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE,
8112 truncstore_s_vi16, masked_truncstore_s_vi16>;
8113 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE,
8114 truncstore_us_vi16, masked_truncstore_us_vi16>;
8116 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc, AVX512_TRUNCATE,
8117 truncstorevi32, masked_truncstorevi32>;
8118 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE,
8119 truncstore_s_vi32, masked_truncstore_s_vi32>;
8120 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE,
8121 truncstore_us_vi32, masked_truncstore_us_vi32>;
8123 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc, AVX512_TRUNCATE,
8124 truncstorevi8, masked_truncstorevi8>;
8125 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE,
8126 truncstore_s_vi8, masked_truncstore_s_vi8>;
8127 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE,
8128 truncstore_us_vi8, masked_truncstore_us_vi8>;
8130 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc, AVX512_TRUNCATE,
8131 truncstorevi16, masked_truncstorevi16>;
8132 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE,
8133 truncstore_s_vi16, masked_truncstore_s_vi16>;
8134 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE,
8135 truncstore_us_vi16, masked_truncstore_us_vi16>;
8137 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc, AVX512_TRUNCATE,
8138 truncstorevi8, masked_truncstorevi8>;
8139 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE,
8140 truncstore_s_vi8, masked_truncstore_s_vi8>;
8141 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE,
8142 truncstore_us_vi8, masked_truncstore_us_vi8>;
8144 let Predicates = [HasAVX512, NoVLX] in {
8145 def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
8146 (v8i16 (EXTRACT_SUBREG
8147 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8148 VR256X:$src, sub_ymm)))), sub_xmm))>;
8149 def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
8150 (v4i32 (EXTRACT_SUBREG
8151 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8152 VR256X:$src, sub_ymm)))), sub_xmm))>;
8155 let Predicates = [HasBWI, NoVLX] in {
8156 def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
8157 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
8158 VR256X:$src, sub_ymm))), sub_xmm))>;
8161 multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins,
8162 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
8163 X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
8164 let ExeDomain = DestInfo.ExeDomain in {
8165 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8166 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
8167 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>,
8168 EVEX, Sched<[itins.Sched]>;
8170 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8171 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
8172 (DestInfo.VT (LdFrag addr:$src)), itins.rm>,
8173 EVEX, Sched<[itins.Sched.Folded]>;
8177 multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
8178 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8179 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8180 let Predicates = [HasVLX, HasBWI] in {
8181 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info,
8182 v16i8x_info, i64mem, LdFrag, InVecNode>,
8183 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
8185 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info,
8186 v16i8x_info, i128mem, LdFrag, OpNode>,
8187 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
8189 let Predicates = [HasBWI] in {
8190 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v32i16_info,
8191 v32i8x_info, i256mem, LdFrag, OpNode>,
8192 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
8196 multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
8197 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8198 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8199 let Predicates = [HasVLX, HasAVX512] in {
8200 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
8201 v16i8x_info, i32mem, LdFrag, InVecNode>,
8202 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
8204 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
8205 v16i8x_info, i64mem, LdFrag, OpNode>,
8206 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
8208 let Predicates = [HasAVX512] in {
8209 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
8210 v16i8x_info, i128mem, LdFrag, OpNode>,
8211 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
8215 multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
8216 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8217 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8218 let Predicates = [HasVLX, HasAVX512] in {
8219 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8220 v16i8x_info, i16mem, LdFrag, InVecNode>,
8221 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
8223 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8224 v16i8x_info, i32mem, LdFrag, OpNode>,
8225 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
8227 let Predicates = [HasAVX512] in {
8228 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8229 v16i8x_info, i64mem, LdFrag, OpNode>,
8230 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
8234 multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
8235 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8236 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8237 let Predicates = [HasVLX, HasAVX512] in {
8238 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info,
8239 v8i16x_info, i64mem, LdFrag, InVecNode>,
8240 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
8242 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info,
8243 v8i16x_info, i128mem, LdFrag, OpNode>,
8244 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
8246 let Predicates = [HasAVX512] in {
8247 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info,
8248 v16i16x_info, i256mem, LdFrag, OpNode>,
8249 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
8253 multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
8254 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8255 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8256 let Predicates = [HasVLX, HasAVX512] in {
8257 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8258 v8i16x_info, i32mem, LdFrag, InVecNode>,
8259 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
8261 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8262 v8i16x_info, i64mem, LdFrag, OpNode>,
8263 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
8265 let Predicates = [HasAVX512] in {
8266 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8267 v8i16x_info, i128mem, LdFrag, OpNode>,
8268 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
8272 multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
8273 SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy,
8274 OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
8276 let Predicates = [HasVLX, HasAVX512] in {
8277 defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info,
8278 v4i32x_info, i64mem, LdFrag, InVecNode>,
8279 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8281 defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info,
8282 v4i32x_info, i128mem, LdFrag, OpNode>,
8283 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8285 let Predicates = [HasAVX512] in {
8286 defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info,
8287 v8i32x_info, i256mem, LdFrag, OpNode>,
8288 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8292 defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8293 defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8294 defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8295 defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8296 defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8297 defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>;
8299 defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8300 defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8301 defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8302 defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8303 defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8304 defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>;
8307 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8308 SDNode InVecOp, PatFrag ExtLoad16> {
8310 let Predicates = [HasVLX, HasBWI] in {
8311 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8312 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8313 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8314 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8315 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8316 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8317 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8318 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8319 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8320 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8322 let Predicates = [HasVLX] in {
8323 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8324 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8325 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8326 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8327 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8328 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8329 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8330 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8332 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
8333 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8334 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8335 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8336 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8337 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8338 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8339 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8341 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8342 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8343 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8344 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8345 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8346 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8347 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8348 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8349 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8350 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8352 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8353 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8354 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
8355 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8356 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8357 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8358 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8359 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8361 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8362 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8363 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8364 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8365 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8366 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8367 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
8368 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8369 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
8370 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8373 let Predicates = [HasVLX, HasBWI] in {
8374 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8375 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8376 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8377 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8378 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8379 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8381 let Predicates = [HasVLX] in {
8382 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8383 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8384 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8385 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8386 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8387 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8388 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8389 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8391 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8392 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8393 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8394 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8395 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8396 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8397 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8398 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8400 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8401 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8402 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8403 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8404 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8405 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8407 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8408 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8409 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8410 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8411 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8412 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8413 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8414 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8416 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8417 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8418 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8419 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8420 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8421 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8424 let Predicates = [HasBWI] in {
8425 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8426 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8428 let Predicates = [HasAVX512] in {
8429 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8430 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8432 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8433 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8434 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8435 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8437 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8438 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8440 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8441 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8443 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8444 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8448 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8449 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
8451 //===----------------------------------------------------------------------===//
8452 // GATHER - SCATTER Operations
8454 // FIXME: Improve scheduling of gather/scatter instructions.
8455 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8456 X86MemOperand memop, PatFrag GatherNode,
8457 RegisterClass MaskRC = _.KRCWM> {
8458 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8459 ExeDomain = _.ExeDomain in
8460 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
8461 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
8462 !strconcat(OpcodeStr#_.Suffix,
8463 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
8464 [(set _.RC:$dst, MaskRC:$mask_wb,
8465 (GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
8466 vectoraddr:$src2))]>, EVEX, EVEX_K,
8467 EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
8470 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8471 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8472 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
8473 vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
8474 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
8475 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
8476 let Predicates = [HasVLX] in {
8477 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8478 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
8479 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
8480 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
8481 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8482 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
8483 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8484 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
8488 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8489 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8490 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
8491 mgatherv16i32>, EVEX_V512;
8492 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8493 mgatherv8i64>, EVEX_V512;
8494 let Predicates = [HasVLX] in {
8495 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8496 vy256xmem, mgatherv8i32>, EVEX_V256;
8497 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8498 vy128xmem, mgatherv4i64>, EVEX_V256;
8499 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8500 vx128xmem, mgatherv4i32>, EVEX_V128;
8501 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8502 vx64xmem, mgatherv2i64, VK2WM>,
8508 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8509 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8511 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8512 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
8514 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8515 X86MemOperand memop, PatFrag ScatterNode> {
8517 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
8519 def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
8520 (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
8521 !strconcat(OpcodeStr#_.Suffix,
8522 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
8523 [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8524 _.KRCWM:$mask, vectoraddr:$dst))]>,
8525 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8526 Sched<[WriteStore]>;
8529 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8530 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8531 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
8532 vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
8533 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
8534 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
8535 let Predicates = [HasVLX] in {
8536 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8537 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
8538 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
8539 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
8540 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8541 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
8542 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8543 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
8547 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8548 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8549 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
8550 mscatterv16i32>, EVEX_V512;
8551 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8552 mscatterv8i64>, EVEX_V512;
8553 let Predicates = [HasVLX] in {
8554 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8555 vy256xmem, mscatterv8i32>, EVEX_V256;
8556 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8557 vy128xmem, mscatterv4i64>, EVEX_V256;
8558 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8559 vx128xmem, mscatterv4i32>, EVEX_V128;
8560 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8561 vx64xmem, mscatterv2i64>, EVEX_V128;
8565 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8566 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
8568 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8569 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
8572 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8573 RegisterClass KRC, X86MemOperand memop> {
8574 let Predicates = [HasPFI], hasSideEffects = 1 in
8575 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
8576 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
8577 [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
8580 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
8581 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8583 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
8584 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8586 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
8587 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8589 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
8590 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8592 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
8593 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8595 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
8596 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8598 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
8599 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8601 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
8602 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8604 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
8605 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8607 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
8608 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8610 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
8611 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8613 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
8614 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8616 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
8617 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8619 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
8620 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8622 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
8623 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8625 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
8626 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8628 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
8629 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
8630 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
8631 [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))],
8632 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
8635 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8636 string OpcodeStr, Predicate prd> {
8637 let Predicates = [prd] in
8638 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8640 let Predicates = [prd, HasVLX] in {
8641 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8642 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8646 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8647 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8648 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8649 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
8651 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
8652 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8653 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8654 [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))],
8655 IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>;
8658 // Use 512bit version to implement 128/256 bit in case NoVLX.
8659 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
8660 X86VectorVTInfo _> {
8662 def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))),
8663 (_.KVT (COPY_TO_REGCLASS
8664 (!cast<Instruction>(NAME#"Zrr")
8665 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
8666 _.RC:$src, _.SubRegIdx)),
8670 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
8671 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8672 let Predicates = [prd] in
8673 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8676 let Predicates = [prd, HasVLX] in {
8677 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
8679 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
8682 let Predicates = [prd, NoVLX] in {
8683 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8684 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
8688 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8689 avx512vl_i8_info, HasBWI>;
8690 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8691 avx512vl_i16_info, HasBWI>, VEX_W;
8692 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8693 avx512vl_i32_info, HasDQI>;
8694 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8695 avx512vl_i64_info, HasDQI>, VEX_W;
8697 //===----------------------------------------------------------------------===//
8698 // AVX-512 - COMPRESS and EXPAND
8701 // FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND?
8702 let Sched = WriteShuffle256 in {
8703 def AVX512_COMPRESS : OpndItins<
8704 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8706 def AVX512_EXPAND : OpndItins<
8707 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
8711 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
8712 string OpcodeStr, OpndItins itins> {
8713 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
8714 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8715 (_.VT (X86compress _.RC:$src1)), itins.rr>, AVX5128IBase,
8716 Sched<[itins.Sched]>;
8718 let mayStore = 1, hasSideEffects = 0 in
8719 def mr : AVX5128I<opc, MRMDestMem, (outs),
8720 (ins _.MemOp:$dst, _.RC:$src),
8721 OpcodeStr # "\t{$src, $dst|$dst, $src}",
8722 []>, EVEX_CD8<_.EltSize, CD8VT1>,
8723 Sched<[itins.Sched.Folded]>;
8725 def mrk : AVX5128I<opc, MRMDestMem, (outs),
8726 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
8727 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8729 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
8730 Sched<[itins.Sched.Folded]>;
8733 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
8734 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
8736 (!cast<Instruction>(NAME#_.ZSuffix##mrk)
8737 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
8740 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
8742 AVX512VLVectorVTInfo VTInfo,
8743 Predicate Pred = HasAVX512> {
8744 let Predicates = [Pred] in
8745 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>,
8746 compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8748 let Predicates = [Pred, HasVLX] in {
8749 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>,
8750 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8751 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>,
8752 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8756 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS,
8757 avx512vl_i32_info>, EVEX;
8758 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS,
8759 avx512vl_i64_info>, EVEX, VEX_W;
8760 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS,
8761 avx512vl_f32_info>, EVEX;
8762 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS,
8763 avx512vl_f64_info>, EVEX, VEX_W;
8766 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
8767 string OpcodeStr, OpndItins itins> {
8768 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8769 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8770 (_.VT (X86expand _.RC:$src1)), itins.rr>, AVX5128IBase,
8771 Sched<[itins.Sched]>;
8773 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8774 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
8775 (_.VT (X86expand (_.VT (bitconvert
8776 (_.LdFrag addr:$src1))))), itins.rm>,
8777 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
8778 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8781 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
8783 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
8784 (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
8785 _.KRCWM:$mask, addr:$src)>;
8787 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
8788 (_.VT _.RC:$src0))),
8789 (!cast<Instruction>(NAME#_.ZSuffix##rmk)
8790 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
8793 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
8795 AVX512VLVectorVTInfo VTInfo,
8796 Predicate Pred = HasAVX512> {
8797 let Predicates = [Pred] in
8798 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>,
8799 expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8801 let Predicates = [Pred, HasVLX] in {
8802 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>,
8803 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8804 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>,
8805 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8809 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND,
8810 avx512vl_i32_info>, EVEX;
8811 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND,
8812 avx512vl_i64_info>, EVEX, VEX_W;
8813 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND,
8814 avx512vl_f32_info>, EVEX;
8815 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND,
8816 avx512vl_f64_info>, EVEX, VEX_W;
8818 //handle instruction reg_vec1 = op(reg_vec,imm)
8820 // op(broadcast(eltVt),imm)
8821 //all instruction created with FROUND_CURRENT
8822 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8823 OpndItins itins, X86VectorVTInfo _> {
8824 let ExeDomain = _.ExeDomain in {
8825 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8826 (ins _.RC:$src1, i32u8imm:$src2),
8827 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8828 (OpNode (_.VT _.RC:$src1),
8829 (i32 imm:$src2)), itins.rr>, Sched<[itins.Sched]>;
8830 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8831 (ins _.MemOp:$src1, i32u8imm:$src2),
8832 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8833 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
8834 (i32 imm:$src2)), itins.rm>,
8835 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8836 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8837 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
8838 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
8839 "${src1}"##_.BroadcastStr##", $src2",
8840 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
8841 (i32 imm:$src2)), itins.rm>, EVEX_B,
8842 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8846 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8847 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
8848 SDNode OpNode, OpndItins itins,
8849 X86VectorVTInfo _> {
8850 let ExeDomain = _.ExeDomain in
8851 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8852 (ins _.RC:$src1, i32u8imm:$src2),
8853 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
8854 "$src1, {sae}, $src2",
8855 (OpNode (_.VT _.RC:$src1),
8857 (i32 FROUND_NO_EXC)), itins.rr>,
8858 EVEX_B, Sched<[itins.Sched]>;
8861 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
8862 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
8863 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
8864 let Predicates = [prd] in {
8865 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8867 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd,
8868 itins, _.info512>, EVEX_V512;
8870 let Predicates = [prd, HasVLX] in {
8871 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8872 _.info128>, EVEX_V128;
8873 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins,
8874 _.info256>, EVEX_V256;
8878 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8879 // op(reg_vec2,mem_vec,imm)
8880 // op(reg_vec2,broadcast(eltVt),imm)
8881 //all instruction created with FROUND_CURRENT
8882 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8883 OpndItins itins, X86VectorVTInfo _>{
8884 let ExeDomain = _.ExeDomain in {
8885 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8886 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8887 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8888 (OpNode (_.VT _.RC:$src1),
8890 (i32 imm:$src3)), itins.rr>,
8891 Sched<[itins.Sched]>;
8892 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8893 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
8894 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8895 (OpNode (_.VT _.RC:$src1),
8896 (_.VT (bitconvert (_.LdFrag addr:$src2))),
8897 (i32 imm:$src3)), itins.rm>,
8898 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8899 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8900 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8901 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
8902 "$src1, ${src2}"##_.BroadcastStr##", $src3",
8903 (OpNode (_.VT _.RC:$src1),
8904 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
8905 (i32 imm:$src3)), itins.rm>, EVEX_B,
8906 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8910 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8911 // op(reg_vec2,mem_vec,imm)
8912 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
8913 OpndItins itins, X86VectorVTInfo DestInfo,
8914 X86VectorVTInfo SrcInfo>{
8915 let ExeDomain = DestInfo.ExeDomain in {
8916 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8917 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
8918 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8919 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
8920 (SrcInfo.VT SrcInfo.RC:$src2),
8921 (i8 imm:$src3))), itins.rr>,
8922 Sched<[itins.Sched]>;
8923 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8924 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
8925 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8926 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
8927 (SrcInfo.VT (bitconvert
8928 (SrcInfo.LdFrag addr:$src2))),
8929 (i8 imm:$src3))), itins.rm>,
8930 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8934 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8935 // op(reg_vec2,mem_vec,imm)
8936 // op(reg_vec2,broadcast(eltVt),imm)
8937 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
8938 OpndItins itins, X86VectorVTInfo _>:
8939 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
8941 let ExeDomain = _.ExeDomain in
8942 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8943 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
8944 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
8945 "$src1, ${src2}"##_.BroadcastStr##", $src3",
8946 (OpNode (_.VT _.RC:$src1),
8947 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
8948 (i8 imm:$src3)), itins.rm>, EVEX_B,
8949 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8952 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8953 // op(reg_vec2,mem_scalar,imm)
8954 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8955 OpndItins itins, X86VectorVTInfo _> {
8956 let ExeDomain = _.ExeDomain in {
8957 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8958 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8959 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8960 (OpNode (_.VT _.RC:$src1),
8962 (i32 imm:$src3)), itins.rr>,
8963 Sched<[itins.Sched]>;
8964 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8965 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8966 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8967 (OpNode (_.VT _.RC:$src1),
8968 (_.VT (scalar_to_vector
8969 (_.ScalarLdFrag addr:$src2))),
8970 (i32 imm:$src3)), itins.rm>,
8971 Sched<[itins.Sched.Folded, ReadAfterLd]>;
8975 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8976 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
8977 SDNode OpNode, OpndItins itins,
8978 X86VectorVTInfo _> {
8979 let ExeDomain = _.ExeDomain in
8980 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8981 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8982 OpcodeStr, "$src3, {sae}, $src2, $src1",
8983 "$src1, $src2, {sae}, $src3",
8984 (OpNode (_.VT _.RC:$src1),
8987 (i32 FROUND_NO_EXC)), itins.rr>,
8988 EVEX_B, Sched<[itins.Sched]>;
8991 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8992 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8993 OpndItins itins, X86VectorVTInfo _> {
8994 let ExeDomain = _.ExeDomain in
8995 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8996 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8997 OpcodeStr, "$src3, {sae}, $src2, $src1",
8998 "$src1, $src2, {sae}, $src3",
8999 (OpNode (_.VT _.RC:$src1),
9002 (i32 FROUND_NO_EXC)), itins.rr>,
9003 EVEX_B, Sched<[itins.Sched]>;
9006 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
9007 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
9008 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
9009 let Predicates = [prd] in {
9010 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>,
9011 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>,
9015 let Predicates = [prd, HasVLX] in {
9016 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>,
9018 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>,
9023 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
9024 OpndItins itins, AVX512VLVectorVTInfo DestInfo,
9025 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
9026 let Predicates = [Pred] in {
9027 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
9028 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9030 let Predicates = [Pred, HasVLX] in {
9031 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
9032 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
9033 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info256,
9034 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9038 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
9039 bits<8> opc, SDNode OpNode, OpndItins itins,
9040 Predicate Pred = HasAVX512> {
9041 let Predicates = [Pred] in {
9042 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
9044 let Predicates = [Pred, HasVLX] in {
9045 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
9046 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
9050 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
9051 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
9052 SDNode OpNodeRnd, OpndItins itins, Predicate prd>{
9053 let Predicates = [prd] in {
9054 defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>,
9055 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>;
9059 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
9060 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
9061 SDNode OpNodeRnd, SizeItins itins, Predicate prd>{
9062 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
9063 opcPs, OpNode, OpNodeRnd, itins.s, prd>,
9064 EVEX_CD8<32, CD8VF>;
9065 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
9066 opcPd, OpNode, OpNodeRnd, itins.d, prd>,
9067 EVEX_CD8<64, CD8VF>, VEX_W;
9070 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
9071 X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>,
9072 AVX512AIi8Base, EVEX;
9073 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
9074 X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>,
9075 AVX512AIi8Base, EVEX;
9076 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
9077 X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>,
9078 AVX512AIi8Base, EVEX;
9080 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
9081 0x50, X86VRange, X86VRangeRnd,
9082 SSE_ALU_F64P, HasDQI>,
9083 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9084 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
9085 0x50, X86VRange, X86VRangeRnd,
9086 SSE_ALU_F32P, HasDQI>,
9087 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9089 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
9090 f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>,
9091 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9092 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
9093 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>,
9094 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9096 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
9097 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>,
9098 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9099 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
9100 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>,
9101 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9103 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
9104 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>,
9105 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9106 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
9107 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>,
9108 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9110 let Predicates = [HasAVX512] in {
9111 def : Pat<(v16f32 (ffloor VR512:$src)),
9112 (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
9113 def : Pat<(v16f32 (fnearbyint VR512:$src)),
9114 (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9115 def : Pat<(v16f32 (fceil VR512:$src)),
9116 (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
9117 def : Pat<(v16f32 (frint VR512:$src)),
9118 (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9119 def : Pat<(v16f32 (ftrunc VR512:$src)),
9120 (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
9122 def : Pat<(v8f64 (ffloor VR512:$src)),
9123 (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
9124 def : Pat<(v8f64 (fnearbyint VR512:$src)),
9125 (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9126 def : Pat<(v8f64 (fceil VR512:$src)),
9127 (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
9128 def : Pat<(v8f64 (frint VR512:$src)),
9129 (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9130 def : Pat<(v8f64 (ftrunc VR512:$src)),
9131 (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
9134 let Predicates = [HasVLX] in {
9135 def : Pat<(v4f32 (ffloor VR128X:$src)),
9136 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
9137 def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9138 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9139 def : Pat<(v4f32 (fceil VR128X:$src)),
9140 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
9141 def : Pat<(v4f32 (frint VR128X:$src)),
9142 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9143 def : Pat<(v4f32 (ftrunc VR128X:$src)),
9144 (VRNDSCALEPSZ128rri VR128X:$src, (i32 0xB))>;
9146 def : Pat<(v2f64 (ffloor VR128X:$src)),
9147 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
9148 def : Pat<(v2f64 (fnearbyint VR128X:$src)),
9149 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
9150 def : Pat<(v2f64 (fceil VR128X:$src)),
9151 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
9152 def : Pat<(v2f64 (frint VR128X:$src)),
9153 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
9154 def : Pat<(v2f64 (ftrunc VR128X:$src)),
9155 (VRNDSCALEPDZ128rri VR128X:$src, (i32 0xB))>;
9157 def : Pat<(v8f32 (ffloor VR256X:$src)),
9158 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
9159 def : Pat<(v8f32 (fnearbyint VR256X:$src)),
9160 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
9161 def : Pat<(v8f32 (fceil VR256X:$src)),
9162 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
9163 def : Pat<(v8f32 (frint VR256X:$src)),
9164 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
9165 def : Pat<(v8f32 (ftrunc VR256X:$src)),
9166 (VRNDSCALEPSZ256rri VR256X:$src, (i32 0xB))>;
9168 def : Pat<(v4f64 (ffloor VR256X:$src)),
9169 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
9170 def : Pat<(v4f64 (fnearbyint VR256X:$src)),
9171 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
9172 def : Pat<(v4f64 (fceil VR256X:$src)),
9173 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
9174 def : Pat<(v4f64 (frint VR256X:$src)),
9175 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
9176 def : Pat<(v4f64 (ftrunc VR256X:$src)),
9177 (VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
9180 multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
9181 AVX512VLVectorVTInfo _, bits<8> opc>{
9182 let Predicates = [HasAVX512] in {
9183 defm Z : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info512>, EVEX_V512;
9186 let Predicates = [HasAVX512, HasVLX] in {
9187 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info256>, EVEX_V256;
9191 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
9192 avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9193 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
9194 avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9195 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
9196 avx512vl_i32_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9197 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
9198 avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9200 let Predicates = [HasAVX512] in {
9201 // Provide fallback in case the load node that is used in the broadcast
9202 // patterns above is used by additional users, which prevents the pattern
9204 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9205 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9206 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9208 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9209 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9210 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9213 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9214 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9215 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9217 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9218 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9219 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9222 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9223 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9224 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9227 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9228 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9229 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9233 multiclass avx512_valign<string OpcodeStr, OpndItins itins,
9234 AVX512VLVectorVTInfo VTInfo_I> {
9235 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
9236 AVX512AIi8Base, EVEX_4V;
9239 defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
9240 EVEX_CD8<32, CD8VF>;
9241 defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
9242 EVEX_CD8<64, CD8VF>, VEX_W;
9244 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
9245 avx512vl_i8_info, avx512vl_i8_info>,
9248 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
9250 def ValignqImm32XForm : SDNodeXForm<imm, [{
9251 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
9253 def ValignqImm8XForm : SDNodeXForm<imm, [{
9254 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
9256 def ValigndImm8XForm : SDNodeXForm<imm, [{
9257 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
9260 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
9261 X86VectorVTInfo From, X86VectorVTInfo To,
9262 SDNodeXForm ImmXForm> {
9263 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9265 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9268 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
9269 To.RC:$src1, To.RC:$src2,
9270 (ImmXForm imm:$src3))>;
9272 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9274 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
9277 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
9278 To.RC:$src1, To.RC:$src2,
9279 (ImmXForm imm:$src3))>;
9281 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9283 (From.VT (OpNode From.RC:$src1,
9284 (bitconvert (To.LdFrag addr:$src2)),
9287 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
9288 To.RC:$src1, addr:$src2,
9289 (ImmXForm imm:$src3))>;
9291 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9293 (From.VT (OpNode From.RC:$src1,
9294 (bitconvert (To.LdFrag addr:$src2)),
9297 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
9298 To.RC:$src1, addr:$src2,
9299 (ImmXForm imm:$src3))>;
9302 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
9303 X86VectorVTInfo From,
9305 SDNodeXForm ImmXForm> :
9306 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
9307 def : Pat<(From.VT (OpNode From.RC:$src1,
9308 (bitconvert (To.VT (X86VBroadcast
9309 (To.ScalarLdFrag addr:$src2)))),
9311 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
9312 (ImmXForm imm:$src3))>;
9314 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9316 (From.VT (OpNode From.RC:$src1,
9318 (To.VT (X86VBroadcast
9319 (To.ScalarLdFrag addr:$src2)))),
9322 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
9323 To.RC:$src1, addr:$src2,
9324 (ImmXForm imm:$src3))>;
9326 def : Pat<(To.VT (vselect To.KRCWM:$mask,
9328 (From.VT (OpNode From.RC:$src1,
9330 (To.VT (X86VBroadcast
9331 (To.ScalarLdFrag addr:$src2)))),
9334 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
9335 To.RC:$src1, addr:$src2,
9336 (ImmXForm imm:$src3))>;
9339 let Predicates = [HasAVX512] in {
9340 // For 512-bit we lower to the widest element type we can. So we only need
9341 // to handle converting valignq to valignd.
9342 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
9343 v16i32_info, ValignqImm32XForm>;
9346 let Predicates = [HasVLX] in {
9347 // For 128-bit we lower to the widest element type we can. So we only need
9348 // to handle converting valignq to valignd.
9349 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
9350 v4i32x_info, ValignqImm32XForm>;
9351 // For 256-bit we lower to the widest element type we can. So we only need
9352 // to handle converting valignq to valignd.
9353 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
9354 v8i32x_info, ValignqImm32XForm>;
9357 let Predicates = [HasVLX, HasBWI] in {
9358 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
9359 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
9360 v16i8x_info, ValignqImm8XForm>;
9361 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
9362 v16i8x_info, ValigndImm8XForm>;
9365 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
9366 SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
9369 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9370 OpndItins itins, X86VectorVTInfo _> {
9371 let ExeDomain = _.ExeDomain in {
9372 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9373 (ins _.RC:$src1), OpcodeStr,
9375 (_.VT (OpNode _.RC:$src1)), itins.rr>, EVEX, AVX5128IBase,
9376 Sched<[itins.Sched]>;
9378 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9379 (ins _.MemOp:$src1), OpcodeStr,
9381 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1)))), itins.rm>,
9382 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
9383 Sched<[itins.Sched.Folded]>;
9387 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9388 OpndItins itins, X86VectorVTInfo _> :
9389 avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> {
9390 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9391 (ins _.ScalarMemOp:$src1), OpcodeStr,
9392 "${src1}"##_.BroadcastStr,
9393 "${src1}"##_.BroadcastStr,
9394 (_.VT (OpNode (X86VBroadcast
9395 (_.ScalarLdFrag addr:$src1)))), itins.rm>,
9396 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
9397 Sched<[itins.Sched.Folded]>;
9400 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9401 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9403 let Predicates = [prd] in
9404 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9407 let Predicates = [prd, HasVLX] in {
9408 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
9410 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
9415 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9416 OpndItins itins, AVX512VLVectorVTInfo VTInfo,
9418 let Predicates = [prd] in
9419 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>,
9422 let Predicates = [prd, HasVLX] in {
9423 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>,
9425 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>,
9430 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
9431 SDNode OpNode, OpndItins itins, Predicate prd> {
9432 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins,
9433 avx512vl_i64_info, prd>, VEX_W;
9434 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins,
9435 avx512vl_i32_info, prd>;
9438 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
9439 SDNode OpNode, OpndItins itins, Predicate prd> {
9440 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins,
9441 avx512vl_i16_info, prd>, VEX_WIG;
9442 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins,
9443 avx512vl_i8_info, prd>, VEX_WIG;
9446 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9447 bits<8> opc_d, bits<8> opc_q,
9448 string OpcodeStr, SDNode OpNode,
9450 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins,
9452 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins,
9456 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>;
9458 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9459 let Predicates = [HasAVX512, NoVLX] in {
9460 def : Pat<(v4i64 (abs VR256X:$src)),
9463 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9465 def : Pat<(v2i64 (abs VR128X:$src)),
9468 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9472 // Use 512bit version to implement 128/256 bit.
9473 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
9474 AVX512VLVectorVTInfo _, Predicate prd> {
9475 let Predicates = [prd, NoVLX] in {
9476 def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9478 (!cast<Instruction>(InstrStr # "Zrr")
9479 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9481 _.info256.SubRegIdx)),
9482 _.info256.SubRegIdx)>;
9484 def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9486 (!cast<Instruction>(InstrStr # "Zrr")
9487 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9489 _.info128.SubRegIdx)),
9490 _.info128.SubRegIdx)>;
9494 // FIXME: Is there a better scheduler itinerary for VPLZCNT?
9495 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
9496 SSE_INTALU_ITINS_P, HasCDI>;
9498 // FIXME: Is there a better scheduler itinerary for VPCONFLICT?
9499 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
9500 SSE_INTALU_ITINS_P, HasCDI>;
9502 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
9503 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
9504 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
9506 //===---------------------------------------------------------------------===//
9507 // Counts number of ones - VPOPCNTD and VPOPCNTQ
9508 //===---------------------------------------------------------------------===//
9510 // FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ?
9511 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
9512 SSE_INTALU_ITINS_P, HasVPOPCNTDQ>;
9514 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
9515 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
9517 //===---------------------------------------------------------------------===//
9518 // Replicate Single FP - MOVSHDUP and MOVSLDUP
9519 //===---------------------------------------------------------------------===//
9520 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
9522 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins,
9523 avx512vl_f32_info, HasAVX512>, XS;
9526 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>;
9527 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>;
9529 //===----------------------------------------------------------------------===//
9530 // AVX-512 - MOVDDUP
9531 //===----------------------------------------------------------------------===//
9533 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
9534 OpndItins itins, X86VectorVTInfo _> {
9535 let ExeDomain = _.ExeDomain in {
9536 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9537 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9538 (_.VT (OpNode (_.VT _.RC:$src))), itins.rr>, EVEX,
9539 Sched<[itins.Sched]>;
9540 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9541 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9542 (_.VT (OpNode (_.VT (scalar_to_vector
9543 (_.ScalarLdFrag addr:$src))))),
9544 itins.rm>, EVEX, EVEX_CD8<_.EltSize, CD8VH>,
9545 Sched<[itins.Sched.Folded]>;
9549 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9550 OpndItins itins, AVX512VLVectorVTInfo VTInfo> {
9552 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512;
9554 let Predicates = [HasAVX512, HasVLX] in {
9555 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>,
9557 defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>,
9562 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
9564 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, itins,
9565 avx512vl_f64_info>, XD, VEX_W;
9568 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>;
9570 let Predicates = [HasVLX] in {
9571 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
9572 (VMOVDDUPZ128rm addr:$src)>;
9573 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9574 (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9575 def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9576 (VMOVDDUPZ128rm addr:$src)>;
9578 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9579 (v2f64 VR128X:$src0)),
9580 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9581 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9582 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9583 (bitconvert (v4i32 immAllZerosV))),
9584 (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9586 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9587 (v2f64 VR128X:$src0)),
9588 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9589 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9590 (bitconvert (v4i32 immAllZerosV))),
9591 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9593 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9594 (v2f64 VR128X:$src0)),
9595 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9596 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))),
9597 (bitconvert (v4i32 immAllZerosV))),
9598 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9601 //===----------------------------------------------------------------------===//
9602 // AVX-512 - Unpack Instructions
9603 //===----------------------------------------------------------------------===//
9604 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9606 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9609 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9610 SSE_INTALU_ITINS_P, HasBWI>;
9611 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9612 SSE_INTALU_ITINS_P, HasBWI>;
9613 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9614 SSE_INTALU_ITINS_P, HasBWI>;
9615 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9616 SSE_INTALU_ITINS_P, HasBWI>;
9618 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9619 SSE_INTALU_ITINS_P, HasAVX512>;
9620 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9621 SSE_INTALU_ITINS_P, HasAVX512>;
9622 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9623 SSE_INTALU_ITINS_P, HasAVX512>;
9624 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9625 SSE_INTALU_ITINS_P, HasAVX512>;
9627 //===----------------------------------------------------------------------===//
9628 // AVX-512 - Extract & Insert Integer Instructions
9629 //===----------------------------------------------------------------------===//
9631 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9632 X86VectorVTInfo _> {
9633 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9634 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9635 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9636 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
9638 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd]>;
9641 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9642 let Predicates = [HasBWI] in {
9643 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9644 (ins _.RC:$src1, u8imm:$src2),
9645 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9646 [(set GR32orGR64:$dst,
9647 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
9648 EVEX, TAPD, Sched<[WriteShuffle]>;
9650 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9654 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9655 let Predicates = [HasBWI] in {
9656 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9657 (ins _.RC:$src1, u8imm:$src2),
9658 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9659 [(set GR32orGR64:$dst,
9660 (X86pextrw (_.VT _.RC:$src1), imm:$src2))],
9661 IIC_SSE_PEXTRW>, EVEX, PD, Sched<[WriteShuffle]>;
9663 let hasSideEffects = 0 in
9664 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
9665 (ins _.RC:$src1, u8imm:$src2),
9666 OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
9667 IIC_SSE_PEXTRW>, EVEX, TAPD, FoldGenData<NAME#rr>,
9668 Sched<[WriteShuffle]>;
9670 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
9674 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
9675 RegisterClass GRC> {
9676 let Predicates = [HasDQI] in {
9677 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
9678 (ins _.RC:$src1, u8imm:$src2),
9679 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9681 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
9682 EVEX, TAPD, Sched<[WriteShuffle]>;
9684 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
9685 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9686 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9687 [(store (extractelt (_.VT _.RC:$src1),
9688 imm:$src2),addr:$dst)]>,
9689 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
9690 Sched<[WriteShuffleLd]>;
9694 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
9695 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
9696 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
9697 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
9699 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9700 X86VectorVTInfo _, PatFrag LdFrag> {
9701 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
9702 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9703 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9705 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
9706 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteShuffleLd, ReadAfterLd]>;
9709 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9710 X86VectorVTInfo _, PatFrag LdFrag> {
9711 let Predicates = [HasBWI] in {
9712 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9713 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
9714 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9716 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
9717 Sched<[WriteShuffle]>;
9719 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
9723 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
9724 X86VectorVTInfo _, RegisterClass GRC> {
9725 let Predicates = [HasDQI] in {
9726 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9727 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
9728 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9730 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
9731 EVEX_4V, TAPD, Sched<[WriteShuffle]>;
9733 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
9734 _.ScalarLdFrag>, TAPD;
9738 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
9739 extloadi8>, TAPD, VEX_WIG;
9740 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
9741 extloadi16>, PD, VEX_WIG;
9742 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
9743 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
9745 //===----------------------------------------------------------------------===//
9746 // VSHUFPS - VSHUFPD Operations
9747 //===----------------------------------------------------------------------===//
9749 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
9750 AVX512VLVectorVTInfo VTInfo_FP>{
9751 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
9752 SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
9753 AVX512AIi8Base, EVEX_4V;
9756 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
9757 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
9759 //===----------------------------------------------------------------------===//
9760 // AVX-512 - Byte shift Left/Right
9761 //===----------------------------------------------------------------------===//
9763 let Sched = WriteVecShift in
9764 def AVX512_BYTESHIFT : OpndItins<
9765 IIC_SSE_INTSHDQ_P_RI, IIC_SSE_INTSHDQ_P_RI
9768 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
9769 Format MRMm, string OpcodeStr,
9770 OpndItins itins, X86VectorVTInfo _>{
9771 def rr : AVX512<opc, MRMr,
9772 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
9773 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9774 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))],
9775 itins.rr>, Sched<[itins.Sched]>;
9776 def rm : AVX512<opc, MRMm,
9777 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
9778 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9779 [(set _.RC:$dst,(_.VT (OpNode
9780 (_.VT (bitconvert (_.LdFrag addr:$src1))),
9781 (i8 imm:$src2))))], itins.rm>,
9782 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9785 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
9786 Format MRMm, string OpcodeStr,
9787 OpndItins itins, Predicate prd>{
9788 let Predicates = [prd] in
9789 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9790 OpcodeStr, itins, v64i8_info>, EVEX_V512;
9791 let Predicates = [prd, HasVLX] in {
9792 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9793 OpcodeStr, itins, v32i8x_info>, EVEX_V256;
9794 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9795 OpcodeStr, itins, v16i8x_info>, EVEX_V128;
9798 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
9799 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9801 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
9802 AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base,
9806 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
9807 string OpcodeStr, OpndItins itins,
9808 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
9809 def rr : AVX512BI<opc, MRMSrcReg,
9810 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
9811 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9812 [(set _dst.RC:$dst,(_dst.VT
9813 (OpNode (_src.VT _src.RC:$src1),
9814 (_src.VT _src.RC:$src2))))], itins.rr>,
9815 Sched<[itins.Sched]>;
9816 def rm : AVX512BI<opc, MRMSrcMem,
9817 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
9818 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9819 [(set _dst.RC:$dst,(_dst.VT
9820 (OpNode (_src.VT _src.RC:$src1),
9821 (_src.VT (bitconvert
9822 (_src.LdFrag addr:$src2))))))], itins.rm>,
9823 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9826 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
9827 string OpcodeStr, OpndItins itins,
9829 let Predicates = [prd] in
9830 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info,
9831 v64i8_info>, EVEX_V512;
9832 let Predicates = [prd, HasVLX] in {
9833 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info,
9834 v32i8x_info>, EVEX_V256;
9835 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info,
9836 v16i8x_info>, EVEX_V128;
9840 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
9841 SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG;
9843 // Transforms to swizzle an immediate to enable better matching when
9844 // memory operand isn't in the right place.
9845 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
9846 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
9847 uint8_t Imm = N->getZExtValue();
9848 // Swap bits 1/4 and 3/6.
9849 uint8_t NewImm = Imm & 0xa5;
9850 if (Imm & 0x02) NewImm |= 0x10;
9851 if (Imm & 0x10) NewImm |= 0x02;
9852 if (Imm & 0x08) NewImm |= 0x40;
9853 if (Imm & 0x40) NewImm |= 0x08;
9854 return getI8Imm(NewImm, SDLoc(N));
9856 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
9857 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9858 uint8_t Imm = N->getZExtValue();
9859 // Swap bits 2/4 and 3/5.
9860 uint8_t NewImm = Imm & 0xc3;
9861 if (Imm & 0x04) NewImm |= 0x10;
9862 if (Imm & 0x10) NewImm |= 0x04;
9863 if (Imm & 0x08) NewImm |= 0x20;
9864 if (Imm & 0x20) NewImm |= 0x08;
9865 return getI8Imm(NewImm, SDLoc(N));
9867 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
9868 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9869 uint8_t Imm = N->getZExtValue();
9870 // Swap bits 1/2 and 5/6.
9871 uint8_t NewImm = Imm & 0x99;
9872 if (Imm & 0x02) NewImm |= 0x04;
9873 if (Imm & 0x04) NewImm |= 0x02;
9874 if (Imm & 0x20) NewImm |= 0x40;
9875 if (Imm & 0x40) NewImm |= 0x20;
9876 return getI8Imm(NewImm, SDLoc(N));
9878 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
9879 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
9880 uint8_t Imm = N->getZExtValue();
9881 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
9882 uint8_t NewImm = Imm & 0x81;
9883 if (Imm & 0x02) NewImm |= 0x04;
9884 if (Imm & 0x04) NewImm |= 0x10;
9885 if (Imm & 0x08) NewImm |= 0x40;
9886 if (Imm & 0x10) NewImm |= 0x02;
9887 if (Imm & 0x20) NewImm |= 0x08;
9888 if (Imm & 0x40) NewImm |= 0x20;
9889 return getI8Imm(NewImm, SDLoc(N));
9891 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
9892 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
9893 uint8_t Imm = N->getZExtValue();
9894 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
9895 uint8_t NewImm = Imm & 0x81;
9896 if (Imm & 0x02) NewImm |= 0x10;
9897 if (Imm & 0x04) NewImm |= 0x02;
9898 if (Imm & 0x08) NewImm |= 0x20;
9899 if (Imm & 0x10) NewImm |= 0x04;
9900 if (Imm & 0x20) NewImm |= 0x40;
9901 if (Imm & 0x40) NewImm |= 0x08;
9902 return getI8Imm(NewImm, SDLoc(N));
9905 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
9906 OpndItins itins, X86VectorVTInfo _>{
9907 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
9908 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
9909 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
9910 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
9911 (OpNode (_.VT _.RC:$src1),
9914 (i8 imm:$src4)), itins.rr, 1, 1>,
9915 AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
9916 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
9917 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
9918 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
9919 (OpNode (_.VT _.RC:$src1),
9921 (_.VT (bitconvert (_.LdFrag addr:$src3))),
9922 (i8 imm:$src4)), itins.rm, 1, 0>,
9923 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
9924 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9925 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
9926 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
9927 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
9928 "$src2, ${src3}"##_.BroadcastStr##", $src4",
9929 (OpNode (_.VT _.RC:$src1),
9931 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
9932 (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
9933 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
9934 Sched<[itins.Sched.Folded, ReadAfterLd]>;
9935 }// Constraints = "$src1 = $dst"
9937 // Additional patterns for matching passthru operand in other positions.
9938 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9939 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9941 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
9942 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9943 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9944 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
9946 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
9947 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
9949 // Additional patterns for matching loads in other positions.
9950 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
9951 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
9952 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
9953 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9954 def : Pat<(_.VT (OpNode _.RC:$src1,
9955 (bitconvert (_.LdFrag addr:$src3)),
9956 _.RC:$src2, (i8 imm:$src4))),
9957 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
9958 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9960 // Additional patterns for matching zero masking with loads in other
9962 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9963 (OpNode (bitconvert (_.LdFrag addr:$src3)),
9964 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9966 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
9967 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9968 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9969 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
9970 _.RC:$src2, (i8 imm:$src4)),
9972 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
9973 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9975 // Additional patterns for matching masked loads with different
9977 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9978 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
9979 _.RC:$src2, (i8 imm:$src4)),
9981 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9982 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9983 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9984 (OpNode (bitconvert (_.LdFrag addr:$src3)),
9985 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9987 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9988 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9989 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9990 (OpNode _.RC:$src2, _.RC:$src1,
9991 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
9993 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9994 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
9995 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9996 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
9997 _.RC:$src1, (i8 imm:$src4)),
9999 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10000 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10001 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10002 (OpNode (bitconvert (_.LdFrag addr:$src3)),
10003 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10005 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
10006 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
10008 // Additional patterns for matching broadcasts in other positions.
10009 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10010 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
10011 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10012 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10013 def : Pat<(_.VT (OpNode _.RC:$src1,
10014 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10015 _.RC:$src2, (i8 imm:$src4))),
10016 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
10017 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10019 // Additional patterns for matching zero masking with broadcasts in other
10021 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10022 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10023 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10025 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10026 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10027 (VPTERNLOG321_imm8 imm:$src4))>;
10028 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10029 (OpNode _.RC:$src1,
10030 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10031 _.RC:$src2, (i8 imm:$src4)),
10033 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
10034 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
10035 (VPTERNLOG132_imm8 imm:$src4))>;
10037 // Additional patterns for matching masked broadcasts with different
10039 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10040 (OpNode _.RC:$src1,
10041 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10042 _.RC:$src2, (i8 imm:$src4)),
10044 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10045 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
10046 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10047 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10048 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
10050 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10051 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
10052 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10053 (OpNode _.RC:$src2, _.RC:$src1,
10054 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10055 (i8 imm:$src4)), _.RC:$src1)),
10056 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10057 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
10058 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10059 (OpNode _.RC:$src2,
10060 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10061 _.RC:$src1, (i8 imm:$src4)),
10063 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10064 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
10065 def : Pat<(_.VT (vselect _.KRCWM:$mask,
10066 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
10067 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
10069 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
10070 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
10073 multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
10074 AVX512VLVectorVTInfo _> {
10075 let Predicates = [HasAVX512] in
10076 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
10077 let Predicates = [HasAVX512, HasVLX] in {
10078 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
10079 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
10083 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
10084 avx512vl_i32_info>;
10085 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
10086 avx512vl_i64_info>, VEX_W;
10088 //===----------------------------------------------------------------------===//
10089 // AVX-512 - FixupImm
10090 //===----------------------------------------------------------------------===//
10092 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
10093 OpndItins itins, X86VectorVTInfo _>{
10094 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
10095 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10096 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10097 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10098 (OpNode (_.VT _.RC:$src1),
10100 (_.IntVT _.RC:$src3),
10102 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
10103 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10104 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
10105 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10106 (OpNode (_.VT _.RC:$src1),
10108 (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
10110 (i32 FROUND_CURRENT)), itins.rm>,
10111 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10112 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
10113 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10114 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10115 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10116 (OpNode (_.VT _.RC:$src1),
10118 (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10120 (i32 FROUND_CURRENT)), itins.rm>,
10121 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10122 } // Constraints = "$src1 = $dst"
10125 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
10126 SDNode OpNode, OpndItins itins,
10127 X86VectorVTInfo _>{
10128 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
10129 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10130 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10131 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10132 "$src2, $src3, {sae}, $src4",
10133 (OpNode (_.VT _.RC:$src1),
10135 (_.IntVT _.RC:$src3),
10137 (i32 FROUND_NO_EXC)), itins.rr>,
10138 EVEX_B, Sched<[itins.Sched]>;
10142 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
10143 OpndItins itins, X86VectorVTInfo _,
10144 X86VectorVTInfo _src3VT> {
10145 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10146 ExeDomain = _.ExeDomain in {
10147 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10148 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10149 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10150 (OpNode (_.VT _.RC:$src1),
10152 (_src3VT.VT _src3VT.RC:$src3),
10154 (i32 FROUND_CURRENT)), itins.rr>, Sched<[itins.Sched]>;
10155 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10156 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10157 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10158 "$src2, $src3, {sae}, $src4",
10159 (OpNode (_.VT _.RC:$src1),
10161 (_src3VT.VT _src3VT.RC:$src3),
10163 (i32 FROUND_NO_EXC)), itins.rm>,
10164 EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10165 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10166 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10167 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10168 (OpNode (_.VT _.RC:$src1),
10170 (_src3VT.VT (scalar_to_vector
10171 (_src3VT.ScalarLdFrag addr:$src3))),
10173 (i32 FROUND_CURRENT)), itins.rm>,
10174 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10178 multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> {
10179 let Predicates = [HasAVX512] in
10180 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10182 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins,
10183 _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512;
10184 let Predicates = [HasAVX512, HasVLX] in {
10185 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10186 _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128;
10187 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins,
10188 _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256;
10192 defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10193 SSE_ALU_F32S, f32x_info, v4i32x_info>,
10194 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10195 defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10196 SSE_ALU_F64S, f64x_info, v2i64x_info>,
10197 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10198 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>,
10199 EVEX_CD8<32, CD8VF>;
10200 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>,
10201 EVEX_CD8<64, CD8VF>, VEX_W;
10205 // Patterns used to select SSE scalar fp arithmetic instructions from
10208 // (1) a scalar fp operation followed by a blend
10210 // The effect is that the backend no longer emits unnecessary vector
10211 // insert instructions immediately after SSE scalar fp instructions
10212 // like addss or mulss.
10214 // For example, given the following code:
10215 // __m128 foo(__m128 A, __m128 B) {
10220 // Previously we generated:
10221 // addss %xmm0, %xmm1
10222 // movss %xmm1, %xmm0
10224 // We now generate:
10225 // addss %xmm1, %xmm0
10227 // (2) a vector packed single/double fp operation followed by a vector insert
10229 // The effect is that the backend converts the packed fp instruction
10230 // followed by a vector insert into a single SSE scalar fp instruction.
10232 // For example, given the following code:
10233 // __m128 foo(__m128 A, __m128 B) {
10234 // __m128 C = A + B;
10235 // return (__m128) {c[0], a[1], a[2], a[3]};
10238 // Previously we generated:
10239 // addps %xmm0, %xmm1
10240 // movss %xmm1, %xmm0
10242 // We now generate:
10243 // addss %xmm1, %xmm0
10245 // TODO: Some canonicalization in lowering would simplify the number of
10246 // patterns we have to try to match.
10247 multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10248 let Predicates = [HasAVX512] in {
10249 // extracted scalar math op with insert via movss
10250 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10251 (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10253 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
10254 (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
10256 // vector math op with insert via movss
10257 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10258 (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
10259 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10261 // extracted masked scalar math op with insert via movss
10262 def : Pat<(X86Movss (v4f32 VR128X:$src1),
10264 (X86selects VK1WM:$mask,
10265 (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10268 (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10269 VK1WM:$mask, v4f32:$src1,
10270 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
10274 defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10275 defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10276 defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10277 defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10279 multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10280 let Predicates = [HasAVX512] in {
10281 // extracted scalar math op with insert via movsd
10282 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10283 (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10285 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
10286 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10288 // vector math op with insert via movsd
10289 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10290 (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
10291 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10293 // extracted masked scalar math op with insert via movss
10294 def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10296 (X86selects VK1WM:$mask,
10297 (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10300 (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10301 VK1WM:$mask, v2f64:$src1,
10302 (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
10306 defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10307 defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10308 defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10309 defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
10311 //===----------------------------------------------------------------------===//
10312 // AES instructions
10313 //===----------------------------------------------------------------------===//
10315 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
10316 let Predicates = [HasVLX, HasVAES] in {
10317 defm Z128 : AESI_binop_rm_int<Op, OpStr,
10318 !cast<Intrinsic>(IntPrefix),
10319 loadv2i64, 0, VR128X, i128mem>,
10320 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
10321 defm Z256 : AESI_binop_rm_int<Op, OpStr,
10322 !cast<Intrinsic>(IntPrefix##"_256"),
10323 loadv4i64, 0, VR256X, i256mem>,
10324 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
10326 let Predicates = [HasAVX512, HasVAES] in
10327 defm Z : AESI_binop_rm_int<Op, OpStr,
10328 !cast<Intrinsic>(IntPrefix##"_512"),
10329 loadv8i64, 0, VR512, i512mem>,
10330 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
10333 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
10334 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
10335 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
10336 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
10338 //===----------------------------------------------------------------------===//
10339 // PCLMUL instructions - Carry less multiplication
10340 //===----------------------------------------------------------------------===//
10342 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
10343 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
10344 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
10346 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
10347 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
10348 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
10350 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
10351 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
10352 EVEX_CD8<64, CD8VF>, VEX_WIG;
10356 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
10357 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
10358 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
10360 //===----------------------------------------------------------------------===//
10362 //===----------------------------------------------------------------------===//
10364 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
10365 OpndItins itins, X86VectorVTInfo VTI> {
10366 let Constraints = "$src1 = $dst",
10367 ExeDomain = VTI.ExeDomain in {
10368 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10369 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10370 "$src3, $src2", "$src2, $src3",
10371 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
10372 itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
10373 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10374 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10375 "$src3, $src2", "$src2, $src3",
10376 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10377 (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
10378 itins.rm>, AVX512FMA3Base,
10379 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10383 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
10384 OpndItins itins, X86VectorVTInfo VTI>
10385 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
10386 let Constraints = "$src1 = $dst",
10387 ExeDomain = VTI.ExeDomain in
10388 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10389 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
10390 "${src3}"##VTI.BroadcastStr##", $src2",
10391 "$src2, ${src3}"##VTI.BroadcastStr,
10392 (OpNode VTI.RC:$src1, VTI.RC:$src2,
10393 (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
10394 itins.rm>, AVX512FMA3Base, EVEX_B,
10395 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10398 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
10399 OpndItins itins, AVX512VLVectorVTInfo VTI> {
10400 let Predicates = [HasVBMI2] in
10401 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
10402 let Predicates = [HasVBMI2, HasVLX] in {
10403 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10404 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
10408 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
10409 OpndItins itins, AVX512VLVectorVTInfo VTI> {
10410 let Predicates = [HasVBMI2] in
10411 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
10412 let Predicates = [HasVBMI2, HasVLX] in {
10413 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
10414 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
10417 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
10418 SDNode OpNode, OpndItins itins> {
10419 defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
10420 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
10421 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
10422 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10423 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
10424 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
10427 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
10428 SDNode OpNode, OpndItins itins> {
10429 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
10430 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
10431 VEX_W, EVEX_CD8<16, CD8VF>;
10432 defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
10433 OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10434 defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
10435 itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10439 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
10440 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
10441 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
10442 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
10445 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS,
10446 avx512vl_i8_info, HasVBMI2>, EVEX;
10447 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS,
10448 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
10450 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND,
10451 avx512vl_i8_info, HasVBMI2>, EVEX;
10452 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND,
10453 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
10455 //===----------------------------------------------------------------------===//
10457 //===----------------------------------------------------------------------===//
10459 let Constraints = "$src1 = $dst" in
10460 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
10461 OpndItins itins, X86VectorVTInfo VTI> {
10462 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
10463 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
10464 "$src3, $src2", "$src2, $src3",
10465 (VTI.VT (OpNode VTI.RC:$src1,
10466 VTI.RC:$src2, VTI.RC:$src3)),
10467 itins.rr>, EVEX_4V, T8PD, Sched<[itins.Sched]>;
10468 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10469 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
10470 "$src3, $src2", "$src2, $src3",
10471 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
10472 (VTI.VT (bitconvert
10473 (VTI.LdFrag addr:$src3))))),
10474 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
10475 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10476 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10477 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
10478 OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
10479 "$src2, ${src3}"##VTI.BroadcastStr,
10480 (OpNode VTI.RC:$src1, VTI.RC:$src2,
10481 (VTI.VT (X86VBroadcast
10482 (VTI.ScalarLdFrag addr:$src3)))),
10483 itins.rm>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
10484 T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>;
10487 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> {
10488 let Predicates = [HasVNNI] in
10489 defm Z : VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512;
10490 let Predicates = [HasVNNI, HasVLX] in {
10491 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256;
10492 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128;
10496 // FIXME: Is there a better scheduler itinerary for VPDP?
10497 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>;
10498 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>;
10499 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>;
10500 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>;
10502 //===----------------------------------------------------------------------===//
10504 //===----------------------------------------------------------------------===//
10506 // FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW?
10507 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P,
10508 avx512vl_i8_info, HasBITALG>;
10509 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P,
10510 avx512vl_i16_info, HasBITALG>, VEX_W;
10512 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
10513 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
10515 multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> {
10516 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
10517 (ins VTI.RC:$src1, VTI.RC:$src2),
10519 "$src2, $src1", "$src1, $src2",
10520 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
10521 (VTI.VT VTI.RC:$src2)), itins.rr>, EVEX_4V, T8PD,
10522 Sched<[itins.Sched]>;
10523 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
10524 (ins VTI.RC:$src1, VTI.MemOp:$src2),
10526 "$src2, $src1", "$src1, $src2",
10527 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
10528 (VTI.VT (bitconvert (VTI.LdFrag addr:$src2)))),
10529 itins.rm>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
10530 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10533 multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> {
10534 let Predicates = [HasBITALG] in
10535 defm Z : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512;
10536 let Predicates = [HasBITALG, HasVLX] in {
10537 defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256;
10538 defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128;
10542 // FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB?
10543 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>;
10545 //===----------------------------------------------------------------------===//
10547 //===----------------------------------------------------------------------===//
10549 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
10550 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10551 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info,
10552 SSE_INTALU_ITINS_P, 1>, EVEX_V512;
10553 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10554 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info,
10555 SSE_INTALU_ITINS_P, 1>, EVEX_V256;
10556 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info,
10557 SSE_INTALU_ITINS_P, 1>, EVEX_V128;
10561 defm GF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>,
10562 EVEX_CD8<8, CD8VF>, T8PD;
10564 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
10565 OpndItins itins, X86VectorVTInfo VTI,
10566 X86VectorVTInfo BcstVTI>
10567 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
10568 let ExeDomain = VTI.ExeDomain in
10569 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
10570 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
10571 OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1",
10572 "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
10573 (OpNode (VTI.VT VTI.RC:$src1),
10574 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
10575 (i8 imm:$src3)), itins.rm>, EVEX_B,
10576 Sched<[itins.Sched.Folded, ReadAfterLd]>;
10579 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
10581 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
10582 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
10583 v8i64_info>, EVEX_V512;
10584 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
10585 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
10586 v4i64x_info>, EVEX_V256;
10587 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
10588 v2i64x_info>, EVEX_V128;
10592 defm GF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
10593 X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
10594 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
10595 defm GF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
10596 X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
10597 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;