1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the X86 AVX512 instruction set, defining the
10 // instructions, and properties of the instructions which are needed for code
11 // generation, machine code emission, and analysis.
13 //===----------------------------------------------------------------------===//
15 // Group template arguments that can be derived from the vector type (EltNum x
16 // EltVT). These are things like the register class for the writemask, etc.
17 // The idea is to pass one of these as the template argument rather than the
18 // individual arguments.
19 // The template is also used for scalar types, in this case numelts is 1.
20 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
22 RegisterClass RC = rc;
23 ValueType EltVT = eltvt;
24 int NumElts = numelts;
26 // Corresponding mask register class.
27 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
29 // Corresponding mask register pair class.
30 RegisterOperand KRPC = !if (!gt(NumElts, 16), ?,
31 !cast<RegisterOperand>("VK" # NumElts # "Pair"));
33 // Corresponding write-mask register class.
34 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
37 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
39 // Suffix used in the instruction mnemonic.
40 string Suffix = suffix;
42 // VTName is a string name for vector VT. For vector types it will be
43 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
44 // It is a little bit complex for scalar types, where NumElts = 1.
45 // In this case we build v4f32 or v2f64
46 string VTName = "v" # !if (!eq (NumElts, 1),
47 !if (!eq (EltVT.Size, 32), 4,
48 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
51 ValueType VT = !cast<ValueType>(VTName);
53 string EltTypeName = !cast<string>(EltVT);
54 // Size of the element type in bits, e.g. 32 for v16i32.
55 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
56 int EltSize = EltVT.Size;
58 // "i" for integer types and "f" for floating-point types
59 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
61 // Size of RC in bits, e.g. 512 for VR512.
64 // The corresponding memory operand, e.g. i512mem for VR512.
65 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
66 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
67 // FP scalar memory operand for intrinsics - ssmem/sdmem.
68 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
69 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
72 PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
74 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" # VTName);
76 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
77 PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
79 PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"),
80 !cast<PatFrags>("sse_load_f32"),
81 !if (!eq (EltTypeName, "f64"),
82 !cast<PatFrags>("sse_load_f64"),
85 // The string to specify embedded broadcast in assembly.
86 string BroadcastStr = "{1to" # NumElts # "}";
88 // 8-bit compressed displacement tuple/subvector format. This is only
89 // defined for NumElts <= 8.
90 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
91 !cast<CD8VForm>("CD8VT" # NumElts), ?);
93 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
94 !if (!eq (Size, 256), sub_ymm, ?));
96 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
97 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
100 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
102 dag ImmAllZerosV = (VT immAllZerosV);
104 string ZSuffix = !if (!eq (Size, 128), "Z128",
105 !if (!eq (Size, 256), "Z256", "Z"));
108 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
109 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
110 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
111 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
112 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
113 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
115 // "x" in v32i8x_info means RC = VR256X
116 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
117 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
118 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
119 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
120 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
121 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
123 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
124 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
125 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
126 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
127 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
128 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
130 // We map scalar types to the smallest (128-bit) vector type
131 // with the appropriate element type. This allows to use the same masking logic.
132 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
133 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
134 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
135 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
137 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
138 X86VectorVTInfo i128> {
139 X86VectorVTInfo info512 = i512;
140 X86VectorVTInfo info256 = i256;
141 X86VectorVTInfo info128 = i128;
144 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
146 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
148 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
150 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
152 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
154 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
157 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
159 RegisterClass KRC = _krc;
160 RegisterClass KRCWM = _krcwm;
164 def v1i1_info : X86KVectorVTInfo<VK1, VK1WM, v1i1>;
165 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
166 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
167 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
168 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
169 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
170 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
172 // Used for matching masked operations. Ensures the operation part only has a
174 def vselect_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
175 (vselect node:$mask, node:$src1, node:$src2), [{
176 return isProfitableToFormMaskedOp(N);
179 def X86selects_mask : PatFrag<(ops node:$mask, node:$src1, node:$src2),
180 (X86selects node:$mask, node:$src1, node:$src2), [{
181 return isProfitableToFormMaskedOp(N);
184 // This multiclass generates the masking variants from the non-masking
185 // variant. It only provides the assembly pieces for the masking variants.
186 // It assumes custom ISel patterns for masking which can be provided as
187 // template arguments.
188 multiclass AVX512_maskable_custom<bits<8> O, Format F,
190 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
192 string AttSrcAsm, string IntelSrcAsm,
194 list<dag> MaskingPattern,
195 list<dag> ZeroMaskingPattern,
196 string MaskingConstraint = "",
197 bit IsCommutable = 0,
198 bit IsKCommutable = 0,
199 bit IsKZCommutable = IsCommutable> {
200 let isCommutable = IsCommutable in
201 def NAME: AVX512<O, F, Outs, Ins,
202 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
203 "$dst, "#IntelSrcAsm#"}",
206 // Prefer over VMOV*rrk Pat<>
207 let isCommutable = IsKCommutable in
208 def NAME#k: AVX512<O, F, Outs, MaskingIns,
209 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
210 "$dst {${mask}}, "#IntelSrcAsm#"}",
213 // In case of the 3src subclass this is overridden with a let.
214 string Constraints = MaskingConstraint;
217 // Zero mask does not add any restrictions to commute operands transformation.
218 // So, it is Ok to use IsCommutable instead of IsKCommutable.
219 let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
220 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
221 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
222 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
228 // Common base class of AVX512_maskable and AVX512_maskable_3src.
229 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
231 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
233 string AttSrcAsm, string IntelSrcAsm,
234 dag RHS, dag MaskingRHS,
235 SDPatternOperator Select = vselect_mask,
236 string MaskingConstraint = "",
237 bit IsCommutable = 0,
238 bit IsKCommutable = 0,
239 bit IsKZCommutable = IsCommutable> :
240 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
241 AttSrcAsm, IntelSrcAsm,
242 [(set _.RC:$dst, RHS)],
243 [(set _.RC:$dst, MaskingRHS)],
245 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
246 MaskingConstraint, IsCommutable,
247 IsKCommutable, IsKZCommutable>;
249 // This multiclass generates the unconditional/non-masking, the masking and
250 // the zero-masking variant of the vector instruction. In the masking case, the
251 // preserved vector elements come from a new dummy input operand tied to $dst.
252 // This version uses a separate dag for non-masking and masking.
253 multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
254 dag Outs, dag Ins, string OpcodeStr,
255 string AttSrcAsm, string IntelSrcAsm,
256 dag RHS, dag MaskRHS,
257 bit IsCommutable = 0, bit IsKCommutable = 0,
258 bit IsKZCommutable = IsCommutable> :
259 AVX512_maskable_custom<O, F, Outs, Ins,
260 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
261 !con((ins _.KRCWM:$mask), Ins),
262 OpcodeStr, AttSrcAsm, IntelSrcAsm,
263 [(set _.RC:$dst, RHS)],
265 (vselect_mask _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
267 (vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
268 "$src0 = $dst", IsCommutable, IsKCommutable,
271 // This multiclass generates the unconditional/non-masking, the masking and
272 // the zero-masking variant of the vector instruction. In the masking case, the
273 // preserved vector elements come from a new dummy input operand tied to $dst.
274 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
275 dag Outs, dag Ins, string OpcodeStr,
276 string AttSrcAsm, string IntelSrcAsm,
278 bit IsCommutable = 0, bit IsKCommutable = 0,
279 bit IsKZCommutable = IsCommutable,
280 SDPatternOperator Select = vselect_mask> :
281 AVX512_maskable_common<O, F, _, Outs, Ins,
282 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
283 !con((ins _.KRCWM:$mask), Ins),
284 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
285 (Select _.KRCWM:$mask, RHS, _.RC:$src0),
286 Select, "$src0 = $dst", IsCommutable, IsKCommutable,
289 // This multiclass generates the unconditional/non-masking, the masking and
290 // the zero-masking variant of the scalar instruction.
291 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
292 dag Outs, dag Ins, string OpcodeStr,
293 string AttSrcAsm, string IntelSrcAsm,
295 AVX512_maskable<O, F, _, Outs, Ins, OpcodeStr, AttSrcAsm, IntelSrcAsm,
296 RHS, 0, 0, 0, X86selects_mask>;
298 // Similar to AVX512_maskable but in this case one of the source operands
299 // ($src1) is already tied to $dst so we just use that for the preserved
300 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
302 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
303 dag Outs, dag NonTiedIns, string OpcodeStr,
304 string AttSrcAsm, string IntelSrcAsm,
306 bit IsCommutable = 0,
307 bit IsKCommutable = 0,
308 SDPatternOperator Select = vselect_mask,
310 AVX512_maskable_common<O, F, _, Outs,
311 !con((ins _.RC:$src1), NonTiedIns),
312 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
313 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
314 OpcodeStr, AttSrcAsm, IntelSrcAsm,
315 !if(MaskOnly, (null_frag), RHS),
316 (Select _.KRCWM:$mask, RHS, _.RC:$src1),
317 Select, "", IsCommutable, IsKCommutable>;
319 // Similar to AVX512_maskable_3src but in this case the input VT for the tied
320 // operand differs from the output VT. This requires a bitconvert on
321 // the preserved vector going into the vselect.
322 // NOTE: The unmasked pattern is disabled.
323 multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
324 X86VectorVTInfo InVT,
325 dag Outs, dag NonTiedIns, string OpcodeStr,
326 string AttSrcAsm, string IntelSrcAsm,
327 dag RHS, bit IsCommutable = 0> :
328 AVX512_maskable_common<O, F, OutVT, Outs,
329 !con((ins InVT.RC:$src1), NonTiedIns),
330 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
331 !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
332 OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
333 (vselect_mask InVT.KRCWM:$mask, RHS,
334 (bitconvert InVT.RC:$src1)),
335 vselect_mask, "", IsCommutable>;
337 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
338 dag Outs, dag NonTiedIns, string OpcodeStr,
339 string AttSrcAsm, string IntelSrcAsm,
341 bit IsCommutable = 0,
342 bit IsKCommutable = 0,
344 AVX512_maskable_3src<O, F, _, Outs, NonTiedIns, OpcodeStr, AttSrcAsm,
345 IntelSrcAsm, RHS, IsCommutable, IsKCommutable,
346 X86selects_mask, MaskOnly>;
348 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
351 string AttSrcAsm, string IntelSrcAsm,
353 AVX512_maskable_custom<O, F, Outs, Ins,
354 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
355 !con((ins _.KRCWM:$mask), Ins),
356 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
359 multiclass AVX512_maskable_3src_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
360 dag Outs, dag NonTiedIns,
362 string AttSrcAsm, string IntelSrcAsm,
364 AVX512_maskable_custom<O, F, Outs,
365 !con((ins _.RC:$src1), NonTiedIns),
366 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
367 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
368 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
371 // Instruction with mask that puts result in mask register,
372 // like "compare" and "vptest"
373 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
375 dag Ins, dag MaskingIns,
377 string AttSrcAsm, string IntelSrcAsm,
379 list<dag> MaskingPattern,
380 bit IsCommutable = 0> {
381 let isCommutable = IsCommutable in {
382 def NAME: AVX512<O, F, Outs, Ins,
383 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
384 "$dst, "#IntelSrcAsm#"}",
387 def NAME#k: AVX512<O, F, Outs, MaskingIns,
388 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
389 "$dst {${mask}}, "#IntelSrcAsm#"}",
390 MaskingPattern>, EVEX_K;
394 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
396 dag Ins, dag MaskingIns,
398 string AttSrcAsm, string IntelSrcAsm,
399 dag RHS, dag MaskingRHS,
400 bit IsCommutable = 0> :
401 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
402 AttSrcAsm, IntelSrcAsm,
403 [(set _.KRC:$dst, RHS)],
404 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
406 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
407 dag Outs, dag Ins, string OpcodeStr,
408 string AttSrcAsm, string IntelSrcAsm,
409 dag RHS, dag RHS_su, bit IsCommutable = 0> :
410 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
411 !con((ins _.KRCWM:$mask), Ins),
412 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
413 (and _.KRCWM:$mask, RHS_su), IsCommutable>;
415 // Used by conversion instructions.
416 multiclass AVX512_maskable_cvt<bits<8> O, Format F, X86VectorVTInfo _,
418 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
420 string AttSrcAsm, string IntelSrcAsm,
421 dag RHS, dag MaskingRHS, dag ZeroMaskingRHS> :
422 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
423 AttSrcAsm, IntelSrcAsm,
424 [(set _.RC:$dst, RHS)],
425 [(set _.RC:$dst, MaskingRHS)],
426 [(set _.RC:$dst, ZeroMaskingRHS)],
429 multiclass AVX512_maskable_fma<bits<8> O, Format F, X86VectorVTInfo _,
430 dag Outs, dag NonTiedIns, string OpcodeStr,
431 string AttSrcAsm, string IntelSrcAsm,
432 dag RHS, dag MaskingRHS, bit IsCommutable,
434 AVX512_maskable_custom<O, F, Outs,
435 !con((ins _.RC:$src1), NonTiedIns),
436 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
437 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
438 OpcodeStr, AttSrcAsm, IntelSrcAsm,
439 [(set _.RC:$dst, RHS)],
441 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.RC:$src1))],
443 (vselect_mask _.KRCWM:$mask, MaskingRHS, _.ImmAllZerosV))],
444 "", IsCommutable, IsKCommutable>;
446 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
447 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
448 // swizzled by ExecutionDomainFix to pxor.
449 // We set canFoldAsLoad because this can be converted to a constant-pool
450 // load of an all-zeros value if folding it would be beneficial.
451 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
452 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
453 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
454 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
455 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
456 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
459 let Predicates = [HasAVX512] in {
460 def : Pat<(v64i8 immAllZerosV), (AVX512_512_SET0)>;
461 def : Pat<(v32i16 immAllZerosV), (AVX512_512_SET0)>;
462 def : Pat<(v8i64 immAllZerosV), (AVX512_512_SET0)>;
463 def : Pat<(v16f32 immAllZerosV), (AVX512_512_SET0)>;
464 def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
467 // Alias instructions that allow VPTERNLOG to be used with a mask to create
468 // a mix of all ones and all zeros elements. This is done this way to force
469 // the same register to be used as input for all three sources.
470 let isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteVecALU] in {
471 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
472 (ins VK16WM:$mask), "",
473 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
474 (v16i32 immAllOnesV),
475 (v16i32 immAllZerosV)))]>;
476 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
477 (ins VK8WM:$mask), "",
478 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
480 (v8i64 immAllZerosV)))]>;
483 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
484 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
485 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
486 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
487 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
488 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
491 let Predicates = [HasAVX512] in {
492 def : Pat<(v8i16 immAllZerosV), (AVX512_128_SET0)>;
493 def : Pat<(v16i8 immAllZerosV), (AVX512_128_SET0)>;
494 def : Pat<(v2i64 immAllZerosV), (AVX512_128_SET0)>;
495 def : Pat<(v4f32 immAllZerosV), (AVX512_128_SET0)>;
496 def : Pat<(v2f64 immAllZerosV), (AVX512_128_SET0)>;
497 def : Pat<(v32i8 immAllZerosV), (AVX512_256_SET0)>;
498 def : Pat<(v16i16 immAllZerosV), (AVX512_256_SET0)>;
499 def : Pat<(v4i64 immAllZerosV), (AVX512_256_SET0)>;
500 def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
501 def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
504 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
505 // This is expanded by ExpandPostRAPseudos.
506 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
507 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
508 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
509 [(set FR32X:$dst, fp32imm0)]>;
510 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
511 [(set FR64X:$dst, fp64imm0)]>;
512 def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
513 [(set VR128X:$dst, fp128imm0)]>;
516 //===----------------------------------------------------------------------===//
517 // AVX-512 - VECTOR INSERT
520 // Supports two different pattern operators for mask and unmasked ops. Allows
521 // null_frag to be passed for one.
522 multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
524 SDPatternOperator vinsert_insert,
525 SDPatternOperator vinsert_for_mask,
526 X86FoldableSchedWrite sched> {
527 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
528 defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
529 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
530 "vinsert" # From.EltTypeName # "x" # From.NumElts,
531 "$src3, $src2, $src1", "$src1, $src2, $src3",
532 (vinsert_insert:$src3 (To.VT To.RC:$src1),
533 (From.VT From.RC:$src2),
535 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
536 (From.VT From.RC:$src2),
538 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
540 defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
541 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
542 "vinsert" # From.EltTypeName # "x" # From.NumElts,
543 "$src3, $src2, $src1", "$src1, $src2, $src3",
544 (vinsert_insert:$src3 (To.VT To.RC:$src1),
545 (From.VT (From.LdFrag addr:$src2)),
547 (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
548 (From.VT (From.LdFrag addr:$src2)),
549 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
550 EVEX_CD8<From.EltSize, From.CD8TupleForm>,
551 Sched<[sched.Folded, sched.ReadAfterFold]>;
555 // Passes the same pattern operator for masked and unmasked ops.
556 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
558 SDPatternOperator vinsert_insert,
559 X86FoldableSchedWrite sched> :
560 vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>;
562 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
563 X86VectorVTInfo To, PatFrag vinsert_insert,
564 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
565 let Predicates = p in {
566 def : Pat<(vinsert_insert:$ins
567 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
568 (To.VT (!cast<Instruction>(InstrStr#"rr")
569 To.RC:$src1, From.RC:$src2,
570 (INSERT_get_vinsert_imm To.RC:$ins)))>;
572 def : Pat<(vinsert_insert:$ins
574 (From.VT (From.LdFrag addr:$src2)),
576 (To.VT (!cast<Instruction>(InstrStr#"rm")
577 To.RC:$src1, addr:$src2,
578 (INSERT_get_vinsert_imm To.RC:$ins)))>;
582 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
583 ValueType EltVT64, int Opcode256,
584 X86FoldableSchedWrite sched> {
586 let Predicates = [HasVLX] in
587 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
588 X86VectorVTInfo< 4, EltVT32, VR128X>,
589 X86VectorVTInfo< 8, EltVT32, VR256X>,
590 vinsert128_insert, sched>, EVEX_V256;
592 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
593 X86VectorVTInfo< 4, EltVT32, VR128X>,
594 X86VectorVTInfo<16, EltVT32, VR512>,
595 vinsert128_insert, sched>, EVEX_V512;
597 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
598 X86VectorVTInfo< 4, EltVT64, VR256X>,
599 X86VectorVTInfo< 8, EltVT64, VR512>,
600 vinsert256_insert, sched>, VEX_W, EVEX_V512;
602 // Even with DQI we'd like to only use these instructions for masking.
603 let Predicates = [HasVLX, HasDQI] in
604 defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
605 X86VectorVTInfo< 2, EltVT64, VR128X>,
606 X86VectorVTInfo< 4, EltVT64, VR256X>,
607 null_frag, vinsert128_insert, sched>,
610 // Even with DQI we'd like to only use these instructions for masking.
611 let Predicates = [HasDQI] in {
612 defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
613 X86VectorVTInfo< 2, EltVT64, VR128X>,
614 X86VectorVTInfo< 8, EltVT64, VR512>,
615 null_frag, vinsert128_insert, sched>,
618 defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
619 X86VectorVTInfo< 8, EltVT32, VR256X>,
620 X86VectorVTInfo<16, EltVT32, VR512>,
621 null_frag, vinsert256_insert, sched>,
626 // FIXME: Is there a better scheduler class for VINSERTF/VINSERTI?
627 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>;
628 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>;
630 // Codegen pattern with the alternative types,
631 // Even with AVX512DQ we'll still use these for unmasked operations.
632 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
633 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
634 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
635 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
637 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
638 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
639 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
640 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
642 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
643 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
644 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
645 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
647 // Codegen pattern with the alternative types insert VEC128 into VEC256
648 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
649 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
650 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
651 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
652 // Codegen pattern with the alternative types insert VEC128 into VEC512
653 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
654 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
655 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
656 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
657 // Codegen pattern with the alternative types insert VEC256 into VEC512
658 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
659 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
660 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
661 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
664 multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
665 X86VectorVTInfo To, X86VectorVTInfo Cast,
666 PatFrag vinsert_insert,
667 SDNodeXForm INSERT_get_vinsert_imm,
669 let Predicates = p in {
671 (vselect_mask Cast.KRCWM:$mask,
673 (vinsert_insert:$ins (To.VT To.RC:$src1),
674 (From.VT From.RC:$src2),
677 (!cast<Instruction>(InstrStr#"rrk")
678 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
679 (INSERT_get_vinsert_imm To.RC:$ins))>;
681 (vselect_mask Cast.KRCWM:$mask,
683 (vinsert_insert:$ins (To.VT To.RC:$src1),
686 (From.LdFrag addr:$src2))),
689 (!cast<Instruction>(InstrStr#"rmk")
690 Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
691 (INSERT_get_vinsert_imm To.RC:$ins))>;
694 (vselect_mask Cast.KRCWM:$mask,
696 (vinsert_insert:$ins (To.VT To.RC:$src1),
697 (From.VT From.RC:$src2),
700 (!cast<Instruction>(InstrStr#"rrkz")
701 Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2,
702 (INSERT_get_vinsert_imm To.RC:$ins))>;
704 (vselect_mask Cast.KRCWM:$mask,
706 (vinsert_insert:$ins (To.VT To.RC:$src1),
707 (From.VT (From.LdFrag addr:$src2)),
710 (!cast<Instruction>(InstrStr#"rmkz")
711 Cast.KRCWM:$mask, To.RC:$src1, addr:$src2,
712 (INSERT_get_vinsert_imm To.RC:$ins))>;
716 defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
717 v8f32x_info, vinsert128_insert,
718 INSERT_get_vinsert128_imm, [HasVLX]>;
719 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info,
720 v4f64x_info, vinsert128_insert,
721 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
723 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
724 v8i32x_info, vinsert128_insert,
725 INSERT_get_vinsert128_imm, [HasVLX]>;
726 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
727 v8i32x_info, vinsert128_insert,
728 INSERT_get_vinsert128_imm, [HasVLX]>;
729 defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
730 v8i32x_info, vinsert128_insert,
731 INSERT_get_vinsert128_imm, [HasVLX]>;
732 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info,
733 v4i64x_info, vinsert128_insert,
734 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
735 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info,
736 v4i64x_info, vinsert128_insert,
737 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
738 defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info,
739 v4i64x_info, vinsert128_insert,
740 INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>;
742 defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
743 v16f32_info, vinsert128_insert,
744 INSERT_get_vinsert128_imm, [HasAVX512]>;
745 defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info,
746 v8f64_info, vinsert128_insert,
747 INSERT_get_vinsert128_imm, [HasDQI]>;
749 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
750 v16i32_info, vinsert128_insert,
751 INSERT_get_vinsert128_imm, [HasAVX512]>;
752 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
753 v16i32_info, vinsert128_insert,
754 INSERT_get_vinsert128_imm, [HasAVX512]>;
755 defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
756 v16i32_info, vinsert128_insert,
757 INSERT_get_vinsert128_imm, [HasAVX512]>;
758 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info,
759 v8i64_info, vinsert128_insert,
760 INSERT_get_vinsert128_imm, [HasDQI]>;
761 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info,
762 v8i64_info, vinsert128_insert,
763 INSERT_get_vinsert128_imm, [HasDQI]>;
764 defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info,
765 v8i64_info, vinsert128_insert,
766 INSERT_get_vinsert128_imm, [HasDQI]>;
768 defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info,
769 v16f32_info, vinsert256_insert,
770 INSERT_get_vinsert256_imm, [HasDQI]>;
771 defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
772 v8f64_info, vinsert256_insert,
773 INSERT_get_vinsert256_imm, [HasAVX512]>;
775 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info,
776 v16i32_info, vinsert256_insert,
777 INSERT_get_vinsert256_imm, [HasDQI]>;
778 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info,
779 v16i32_info, vinsert256_insert,
780 INSERT_get_vinsert256_imm, [HasDQI]>;
781 defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info,
782 v16i32_info, vinsert256_insert,
783 INSERT_get_vinsert256_imm, [HasDQI]>;
784 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
785 v8i64_info, vinsert256_insert,
786 INSERT_get_vinsert256_imm, [HasAVX512]>;
787 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
788 v8i64_info, vinsert256_insert,
789 INSERT_get_vinsert256_imm, [HasAVX512]>;
790 defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
791 v8i64_info, vinsert256_insert,
792 INSERT_get_vinsert256_imm, [HasAVX512]>;
794 // vinsertps - insert f32 to XMM
795 let ExeDomain = SSEPackedSingle in {
796 let isCommutable = 1 in
797 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
798 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
799 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
800 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
801 EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
802 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
803 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
804 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
805 [(set VR128X:$dst, (X86insertps VR128X:$src1,
806 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
808 EVEX_4V, EVEX_CD8<32, CD8VT1>,
809 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
812 //===----------------------------------------------------------------------===//
813 // AVX-512 VECTOR EXTRACT
816 // Supports two different pattern operators for mask and unmasked ops. Allows
817 // null_frag to be passed for one.
818 multiclass vextract_for_size_split<int Opcode,
819 X86VectorVTInfo From, X86VectorVTInfo To,
820 SDPatternOperator vextract_extract,
821 SDPatternOperator vextract_for_mask,
822 SchedWrite SchedRR, SchedWrite SchedMR> {
824 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
825 defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
826 (ins From.RC:$src1, u8imm:$idx),
827 "vextract" # To.EltTypeName # "x" # To.NumElts,
828 "$idx, $src1", "$src1, $idx",
829 (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
830 (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
831 AVX512AIi8Base, EVEX, Sched<[SchedRR]>;
833 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
834 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
835 "vextract" # To.EltTypeName # "x" # To.NumElts #
836 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
837 [(store (To.VT (vextract_extract:$idx
838 (From.VT From.RC:$src1), (iPTR imm))),
842 let mayStore = 1, hasSideEffects = 0 in
843 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
844 (ins To.MemOp:$dst, To.KRCWM:$mask,
845 From.RC:$src1, u8imm:$idx),
846 "vextract" # To.EltTypeName # "x" # To.NumElts #
847 "\t{$idx, $src1, $dst {${mask}}|"
848 "$dst {${mask}}, $src1, $idx}", []>,
849 EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
853 // Passes the same pattern operator for masked and unmasked ops.
854 multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
856 SDPatternOperator vextract_extract,
857 SchedWrite SchedRR, SchedWrite SchedMR> :
858 vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract, SchedRR, SchedMR>;
860 // Codegen pattern for the alternative types
861 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
862 X86VectorVTInfo To, PatFrag vextract_extract,
863 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
864 let Predicates = p in {
865 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
866 (To.VT (!cast<Instruction>(InstrStr#"rr")
868 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
869 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
870 (iPTR imm))), addr:$dst),
871 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
872 (EXTRACT_get_vextract_imm To.RC:$ext))>;
876 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
877 ValueType EltVT64, int Opcode256,
878 SchedWrite SchedRR, SchedWrite SchedMR> {
879 let Predicates = [HasAVX512] in {
880 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
881 X86VectorVTInfo<16, EltVT32, VR512>,
882 X86VectorVTInfo< 4, EltVT32, VR128X>,
883 vextract128_extract, SchedRR, SchedMR>,
884 EVEX_V512, EVEX_CD8<32, CD8VT4>;
885 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
886 X86VectorVTInfo< 8, EltVT64, VR512>,
887 X86VectorVTInfo< 4, EltVT64, VR256X>,
888 vextract256_extract, SchedRR, SchedMR>,
889 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
891 let Predicates = [HasVLX] in
892 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
893 X86VectorVTInfo< 8, EltVT32, VR256X>,
894 X86VectorVTInfo< 4, EltVT32, VR128X>,
895 vextract128_extract, SchedRR, SchedMR>,
896 EVEX_V256, EVEX_CD8<32, CD8VT4>;
898 // Even with DQI we'd like to only use these instructions for masking.
899 let Predicates = [HasVLX, HasDQI] in
900 defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
901 X86VectorVTInfo< 4, EltVT64, VR256X>,
902 X86VectorVTInfo< 2, EltVT64, VR128X>,
903 null_frag, vextract128_extract, SchedRR, SchedMR>,
904 VEX_W1X, EVEX_V256, EVEX_CD8<64, CD8VT2>;
906 // Even with DQI we'd like to only use these instructions for masking.
907 let Predicates = [HasDQI] in {
908 defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
909 X86VectorVTInfo< 8, EltVT64, VR512>,
910 X86VectorVTInfo< 2, EltVT64, VR128X>,
911 null_frag, vextract128_extract, SchedRR, SchedMR>,
912 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
913 defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
914 X86VectorVTInfo<16, EltVT32, VR512>,
915 X86VectorVTInfo< 8, EltVT32, VR256X>,
916 null_frag, vextract256_extract, SchedRR, SchedMR>,
917 EVEX_V512, EVEX_CD8<32, CD8VT8>;
921 // TODO - replace WriteFStore/WriteVecStore with X86SchedWriteMoveLSWidths types.
922 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b, WriteFShuffle256, WriteFStore>;
923 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b, WriteShuffle256, WriteVecStore>;
925 // extract_subvector codegen patterns with the alternative types.
926 // Even with AVX512DQ we'll still use these for unmasked operations.
927 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
928 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
929 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
930 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
932 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
933 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
934 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
935 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
937 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
938 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
939 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
940 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
942 // Codegen pattern with the alternative types extract VEC128 from VEC256
943 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
944 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
945 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
946 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
948 // Codegen pattern with the alternative types extract VEC128 from VEC512
949 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
950 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
951 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
952 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
953 // Codegen pattern with the alternative types extract VEC256 from VEC512
954 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
955 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
956 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
957 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
960 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
961 // smaller extract to enable EVEX->VEX.
962 let Predicates = [NoVLX] in {
963 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
964 (v2i64 (VEXTRACTI128rr
965 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
967 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
968 (v2f64 (VEXTRACTF128rr
969 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
971 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
972 (v4i32 (VEXTRACTI128rr
973 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
975 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
976 (v4f32 (VEXTRACTF128rr
977 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
979 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
980 (v8i16 (VEXTRACTI128rr
981 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
983 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
984 (v16i8 (VEXTRACTI128rr
985 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
989 // A 128-bit extract from bits [255:128] of a 512-bit vector should use a
990 // smaller extract to enable EVEX->VEX.
991 let Predicates = [HasVLX] in {
992 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 2))),
993 (v2i64 (VEXTRACTI32x4Z256rr
994 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm)),
996 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 2))),
997 (v2f64 (VEXTRACTF32x4Z256rr
998 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm)),
1000 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 4))),
1001 (v4i32 (VEXTRACTI32x4Z256rr
1002 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm)),
1004 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 4))),
1005 (v4f32 (VEXTRACTF32x4Z256rr
1006 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm)),
1008 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 8))),
1009 (v8i16 (VEXTRACTI32x4Z256rr
1010 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm)),
1012 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
1013 (v16i8 (VEXTRACTI32x4Z256rr
1014 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm)),
1019 // Additional patterns for handling a bitcast between the vselect and the
1020 // extract_subvector.
1021 multiclass vextract_for_mask_cast<string InstrStr, X86VectorVTInfo From,
1022 X86VectorVTInfo To, X86VectorVTInfo Cast,
1023 PatFrag vextract_extract,
1024 SDNodeXForm EXTRACT_get_vextract_imm,
1025 list<Predicate> p> {
1026 let Predicates = p in {
1027 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1029 (To.VT (vextract_extract:$ext
1030 (From.VT From.RC:$src), (iPTR imm)))),
1032 (Cast.VT (!cast<Instruction>(InstrStr#"rrk")
1033 Cast.RC:$src0, Cast.KRCWM:$mask, From.RC:$src,
1034 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1036 def : Pat<(Cast.VT (vselect_mask Cast.KRCWM:$mask,
1038 (To.VT (vextract_extract:$ext
1039 (From.VT From.RC:$src), (iPTR imm)))),
1040 Cast.ImmAllZerosV)),
1041 (Cast.VT (!cast<Instruction>(InstrStr#"rrkz")
1042 Cast.KRCWM:$mask, From.RC:$src,
1043 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
1047 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
1048 v4f32x_info, vextract128_extract,
1049 EXTRACT_get_vextract128_imm, [HasVLX]>;
1050 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z256", v8f32x_info, v4f32x_info,
1051 v2f64x_info, vextract128_extract,
1052 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1054 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
1055 v4i32x_info, vextract128_extract,
1056 EXTRACT_get_vextract128_imm, [HasVLX]>;
1057 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
1058 v4i32x_info, vextract128_extract,
1059 EXTRACT_get_vextract128_imm, [HasVLX]>;
1060 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
1061 v4i32x_info, vextract128_extract,
1062 EXTRACT_get_vextract128_imm, [HasVLX]>;
1063 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v8i32x_info, v4i32x_info,
1064 v2i64x_info, vextract128_extract,
1065 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1066 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v16i16x_info, v8i16x_info,
1067 v2i64x_info, vextract128_extract,
1068 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1069 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z256", v32i8x_info, v16i8x_info,
1070 v2i64x_info, vextract128_extract,
1071 EXTRACT_get_vextract128_imm, [HasDQI, HasVLX]>;
1073 defm : vextract_for_mask_cast<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
1074 v4f32x_info, vextract128_extract,
1075 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1076 defm : vextract_for_mask_cast<"VEXTRACTF64x2Z", v16f32_info, v4f32x_info,
1077 v2f64x_info, vextract128_extract,
1078 EXTRACT_get_vextract128_imm, [HasDQI]>;
1080 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
1081 v4i32x_info, vextract128_extract,
1082 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1083 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
1084 v4i32x_info, vextract128_extract,
1085 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1086 defm : vextract_for_mask_cast<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
1087 v4i32x_info, vextract128_extract,
1088 EXTRACT_get_vextract128_imm, [HasAVX512]>;
1089 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v16i32_info, v4i32x_info,
1090 v2i64x_info, vextract128_extract,
1091 EXTRACT_get_vextract128_imm, [HasDQI]>;
1092 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v32i16_info, v8i16x_info,
1093 v2i64x_info, vextract128_extract,
1094 EXTRACT_get_vextract128_imm, [HasDQI]>;
1095 defm : vextract_for_mask_cast<"VEXTRACTI64x2Z", v64i8_info, v16i8x_info,
1096 v2i64x_info, vextract128_extract,
1097 EXTRACT_get_vextract128_imm, [HasDQI]>;
1099 defm : vextract_for_mask_cast<"VEXTRACTF32x8Z", v8f64_info, v4f64x_info,
1100 v8f32x_info, vextract256_extract,
1101 EXTRACT_get_vextract256_imm, [HasDQI]>;
1102 defm : vextract_for_mask_cast<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
1103 v4f64x_info, vextract256_extract,
1104 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1106 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v8i64_info, v4i64x_info,
1107 v8i32x_info, vextract256_extract,
1108 EXTRACT_get_vextract256_imm, [HasDQI]>;
1109 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v32i16_info, v16i16x_info,
1110 v8i32x_info, vextract256_extract,
1111 EXTRACT_get_vextract256_imm, [HasDQI]>;
1112 defm : vextract_for_mask_cast<"VEXTRACTI32x8Z", v64i8_info, v32i8x_info,
1113 v8i32x_info, vextract256_extract,
1114 EXTRACT_get_vextract256_imm, [HasDQI]>;
1115 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
1116 v4i64x_info, vextract256_extract,
1117 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1118 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
1119 v4i64x_info, vextract256_extract,
1120 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1121 defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
1122 v4i64x_info, vextract256_extract,
1123 EXTRACT_get_vextract256_imm, [HasAVX512]>;
1125 // vextractps - extract 32 bits from XMM
1126 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
1127 (ins VR128X:$src1, u8imm:$src2),
1128 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1129 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
1130 EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
1132 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
1133 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
1134 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
1135 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
1137 EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
1139 //===---------------------------------------------------------------------===//
1140 // AVX-512 BROADCAST
1142 // broadcast with a scalar argument.
1143 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
1145 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
1146 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
1147 (!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
1148 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1149 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1150 (X86VBroadcast SrcInfo.FRC:$src),
1151 DestInfo.RC:$src0)),
1152 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrk)
1153 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
1154 (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1155 def : Pat<(DestInfo.VT (vselect_mask DestInfo.KRCWM:$mask,
1156 (X86VBroadcast SrcInfo.FRC:$src),
1157 DestInfo.ImmAllZerosV)),
1158 (!cast<Instruction>(Name#DestInfo.ZSuffix#rrkz)
1159 DestInfo.KRCWM:$mask, (SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
1162 // Split version to allow mask and broadcast node to be different types. This
1163 // helps support the 32x2 broadcasts.
1164 multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
1166 SchedWrite SchedRR, SchedWrite SchedRM,
1167 X86VectorVTInfo MaskInfo,
1168 X86VectorVTInfo DestInfo,
1169 X86VectorVTInfo SrcInfo,
1170 bit IsConvertibleToThreeAddress,
1171 SDPatternOperator UnmaskedOp = X86VBroadcast,
1172 SDPatternOperator UnmaskedBcastOp = SrcInfo.BroadcastLdFrag> {
1173 let hasSideEffects = 0 in
1174 def rr : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst), (ins SrcInfo.RC:$src),
1175 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1176 [(set MaskInfo.RC:$dst,
1180 (UnmaskedOp (SrcInfo.VT SrcInfo.RC:$src))))))],
1181 DestInfo.ExeDomain>, T8PD, EVEX, Sched<[SchedRR]>;
1182 def rrkz : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1183 (ins MaskInfo.KRCWM:$mask, SrcInfo.RC:$src),
1184 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1185 "${dst} {${mask}} {z}, $src}"),
1186 [(set MaskInfo.RC:$dst,
1187 (vselect_mask MaskInfo.KRCWM:$mask,
1191 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1192 MaskInfo.ImmAllZerosV))],
1193 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ, Sched<[SchedRR]>;
1194 let Constraints = "$src0 = $dst" in
1195 def rrk : AVX512PI<opc, MRMSrcReg, (outs MaskInfo.RC:$dst),
1196 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1198 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1199 "${dst} {${mask}}, $src}"),
1200 [(set MaskInfo.RC:$dst,
1201 (vselect_mask MaskInfo.KRCWM:$mask,
1205 (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
1206 MaskInfo.RC:$src0))],
1207 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K, Sched<[SchedRR]>;
1209 let hasSideEffects = 0, mayLoad = 1 in
1210 def rm : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1211 (ins SrcInfo.ScalarMemOp:$src),
1212 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1213 [(set MaskInfo.RC:$dst,
1217 (UnmaskedBcastOp addr:$src)))))],
1218 DestInfo.ExeDomain>, T8PD, EVEX,
1219 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1221 def rmkz : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1222 (ins MaskInfo.KRCWM:$mask, SrcInfo.ScalarMemOp:$src),
1223 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
1224 "${dst} {${mask}} {z}, $src}"),
1225 [(set MaskInfo.RC:$dst,
1226 (vselect_mask MaskInfo.KRCWM:$mask,
1230 (SrcInfo.BroadcastLdFrag addr:$src)))),
1231 MaskInfo.ImmAllZerosV))],
1232 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_KZ,
1233 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1235 let Constraints = "$src0 = $dst",
1236 isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
1237 def rmk : AVX512PI<opc, MRMSrcMem, (outs MaskInfo.RC:$dst),
1238 (ins MaskInfo.RC:$src0, MaskInfo.KRCWM:$mask,
1239 SrcInfo.ScalarMemOp:$src),
1240 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}}|",
1241 "${dst} {${mask}}, $src}"),
1242 [(set MaskInfo.RC:$dst,
1243 (vselect_mask MaskInfo.KRCWM:$mask,
1247 (SrcInfo.BroadcastLdFrag addr:$src)))),
1248 MaskInfo.RC:$src0))],
1249 DestInfo.ExeDomain>, T8PD, EVEX, EVEX_K,
1250 EVEX_CD8<SrcInfo.EltSize, CD8VT1>, Sched<[SchedRM]>;
1253 // Helper class to force mask and broadcast result to same type.
1254 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
1255 SchedWrite SchedRR, SchedWrite SchedRM,
1256 X86VectorVTInfo DestInfo,
1257 X86VectorVTInfo SrcInfo,
1258 bit IsConvertibleToThreeAddress> :
1259 avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
1260 DestInfo, DestInfo, SrcInfo,
1261 IsConvertibleToThreeAddress>;
1263 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
1264 AVX512VLVectorVTInfo _> {
1265 let Predicates = [HasAVX512] in {
1266 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1267 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1268 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1273 let Predicates = [HasVLX] in {
1274 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1275 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1276 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1282 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
1283 AVX512VLVectorVTInfo _> {
1284 let Predicates = [HasAVX512] in {
1285 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1286 WriteFShuffle256Ld, _.info512, _.info128, 1>,
1287 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
1292 let Predicates = [HasVLX] in {
1293 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1294 WriteFShuffle256Ld, _.info256, _.info128, 1>,
1295 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
1298 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
1299 WriteFShuffle256Ld, _.info128, _.info128, 1>,
1300 avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
1305 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
1307 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
1308 avx512vl_f64_info>, VEX_W1X;
1310 multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
1311 X86VectorVTInfo _, SDPatternOperator OpNode,
1312 RegisterClass SrcRC> {
1313 // Fold with a mask even if it has multiple uses since it is cheap.
1314 let ExeDomain = _.ExeDomain in
1315 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
1317 "vpbroadcast"#_.Suffix, "$src", "$src",
1318 (_.VT (OpNode SrcRC:$src)), /*IsCommutable*/0,
1319 /*IsKCommutable*/0, /*IsKZCommutable*/0, vselect>,
1320 T8PD, EVEX, Sched<[SchedRR]>;
1323 multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
1324 X86VectorVTInfo _, SDPatternOperator OpNode,
1325 RegisterClass SrcRC, SubRegIndex Subreg> {
1326 let hasSideEffects = 0, ExeDomain = _.ExeDomain in
1327 defm rr : AVX512_maskable_custom<opc, MRMSrcReg,
1328 (outs _.RC:$dst), (ins GR32:$src),
1329 !con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
1330 !con((ins _.KRCWM:$mask), (ins GR32:$src)),
1331 "vpbroadcast"#_.Suffix, "$src", "$src", [], [], [],
1332 "$src0 = $dst">, T8PD, EVEX, Sched<[SchedRR]>;
1334 def : Pat <(_.VT (OpNode SrcRC:$src)),
1335 (!cast<Instruction>(Name#rr)
1336 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1338 // Fold with a mask even if it has multiple uses since it is cheap.
1339 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.RC:$src0),
1340 (!cast<Instruction>(Name#rrk) _.RC:$src0, _.KRCWM:$mask,
1341 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1343 def : Pat <(vselect _.KRCWM:$mask, (_.VT (OpNode SrcRC:$src)), _.ImmAllZerosV),
1344 (!cast<Instruction>(Name#rrkz) _.KRCWM:$mask,
1345 (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SrcRC:$src, Subreg)))>;
1348 multiclass avx512_int_broadcastbw_reg_vl<bits<8> opc, string Name,
1349 AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
1350 RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
1351 let Predicates = [prd] in
1352 defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
1353 OpNode, SrcRC, Subreg>, EVEX_V512;
1354 let Predicates = [prd, HasVLX] in {
1355 defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
1356 _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
1357 defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
1358 _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
1362 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
1363 SDPatternOperator OpNode,
1364 RegisterClass SrcRC, Predicate prd> {
1365 let Predicates = [prd] in
1366 defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
1368 let Predicates = [prd, HasVLX] in {
1369 defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
1371 defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
1376 defm VPBROADCASTBr : avx512_int_broadcastbw_reg_vl<0x7A, "VPBROADCASTBr",
1377 avx512vl_i8_info, X86VBroadcast, GR8, sub_8bit, HasBWI>;
1378 defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
1379 avx512vl_i16_info, X86VBroadcast, GR16, sub_16bit,
1381 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1382 X86VBroadcast, GR32, HasAVX512>;
1383 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1384 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1386 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1387 AVX512VLVectorVTInfo _, Predicate prd,
1388 bit IsConvertibleToThreeAddress> {
1389 let Predicates = [prd] in {
1390 defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1391 WriteShuffle256Ld, _.info512, _.info128,
1392 IsConvertibleToThreeAddress>,
1395 let Predicates = [prd, HasVLX] in {
1396 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
1397 WriteShuffle256Ld, _.info256, _.info128,
1398 IsConvertibleToThreeAddress>,
1400 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
1401 WriteShuffleXLd, _.info128, _.info128,
1402 IsConvertibleToThreeAddress>,
1407 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1408 avx512vl_i8_info, HasBWI, 0>;
1409 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1410 avx512vl_i16_info, HasBWI, 0>;
1411 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1412 avx512vl_i32_info, HasAVX512, 1>;
1413 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1414 avx512vl_i64_info, HasAVX512, 1>, VEX_W1X;
1416 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1417 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1418 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1419 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1420 (_Dst.VT (X86SubVBroadcast
1421 (_Src.VT (_Src.LdFrag addr:$src))))>,
1422 Sched<[SchedWriteShuffle.YMM.Folded]>,
1426 // This should be used for the AVX512DQ broadcast instructions. It disables
1427 // the unmasked patterns so that we only use the DQ instructions when masking
1429 multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
1430 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1431 let hasSideEffects = 0, mayLoad = 1 in
1432 defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1433 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1435 (_Dst.VT (X86SubVBroadcast
1436 (_Src.VT (_Src.LdFrag addr:$src))))>,
1437 Sched<[SchedWriteShuffle.YMM.Folded]>,
1441 //===----------------------------------------------------------------------===//
1442 // AVX-512 BROADCAST SUBVECTORS
1445 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1446 v16i32_info, v4i32x_info>,
1447 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1448 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1449 v16f32_info, v4f32x_info>,
1450 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1451 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1452 v8i64_info, v4i64x_info>, VEX_W,
1453 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1454 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1455 v8f64_info, v4f64x_info>, VEX_W,
1456 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1458 let Predicates = [HasAVX512] in {
1459 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1460 (VBROADCASTF64X4rm addr:$src)>;
1461 def : Pat<(v16i32 (X86SubVBroadcast (loadv8i32 addr:$src))),
1462 (VBROADCASTI64X4rm addr:$src)>;
1463 def : Pat<(v32i16 (X86SubVBroadcast (loadv16i16 addr:$src))),
1464 (VBROADCASTI64X4rm addr:$src)>;
1465 def : Pat<(v64i8 (X86SubVBroadcast (loadv32i8 addr:$src))),
1466 (VBROADCASTI64X4rm addr:$src)>;
1468 // Provide fallback in case the load node that is used in the patterns above
1469 // is used by additional users, which prevents the pattern selection.
1470 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1471 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1472 (v4f64 VR256X:$src), 1)>;
1473 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1474 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1475 (v8f32 VR256X:$src), 1)>;
1476 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1477 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1478 (v4i64 VR256X:$src), 1)>;
1479 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1480 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1481 (v8i32 VR256X:$src), 1)>;
1482 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1483 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1484 (v16i16 VR256X:$src), 1)>;
1485 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1486 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1487 (v32i8 VR256X:$src), 1)>;
1489 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1490 (VBROADCASTF32X4rm addr:$src)>;
1491 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1492 (VBROADCASTI32X4rm addr:$src)>;
1493 def : Pat<(v32i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1494 (VBROADCASTI32X4rm addr:$src)>;
1495 def : Pat<(v64i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1496 (VBROADCASTI32X4rm addr:$src)>;
1498 // Patterns for selects of bitcasted operations.
1499 def : Pat<(vselect_mask VK16WM:$mask,
1500 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1501 (v16f32 immAllZerosV)),
1502 (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>;
1503 def : Pat<(vselect_mask VK16WM:$mask,
1504 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1506 (VBROADCASTF32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1507 def : Pat<(vselect_mask VK16WM:$mask,
1508 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1509 (v16i32 immAllZerosV)),
1510 (VBROADCASTI32X4rmkz VK16WM:$mask, addr:$src)>;
1511 def : Pat<(vselect_mask VK16WM:$mask,
1512 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1514 (VBROADCASTI32X4rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1516 def : Pat<(vselect_mask VK8WM:$mask,
1517 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1518 (v8f64 immAllZerosV)),
1519 (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>;
1520 def : Pat<(vselect_mask VK8WM:$mask,
1521 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))),
1523 (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1524 def : Pat<(vselect_mask VK8WM:$mask,
1525 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1526 (v8i64 immAllZerosV)),
1527 (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>;
1528 def : Pat<(vselect_mask VK8WM:$mask,
1529 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))),
1531 (VBROADCASTI64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1534 let Predicates = [HasVLX] in {
1535 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1536 v8i32x_info, v4i32x_info>,
1537 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1538 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1539 v8f32x_info, v4f32x_info>,
1540 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1542 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1543 (VBROADCASTF32X4Z256rm addr:$src)>;
1544 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1545 (VBROADCASTI32X4Z256rm addr:$src)>;
1546 def : Pat<(v16i16 (X86SubVBroadcast (loadv8i16 addr:$src))),
1547 (VBROADCASTI32X4Z256rm addr:$src)>;
1548 def : Pat<(v32i8 (X86SubVBroadcast (loadv16i8 addr:$src))),
1549 (VBROADCASTI32X4Z256rm addr:$src)>;
1551 // Patterns for selects of bitcasted operations.
1552 def : Pat<(vselect_mask VK8WM:$mask,
1553 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1554 (v8f32 immAllZerosV)),
1555 (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1556 def : Pat<(vselect_mask VK8WM:$mask,
1557 (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))),
1559 (VBROADCASTF32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1560 def : Pat<(vselect_mask VK8WM:$mask,
1561 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1562 (v8i32 immAllZerosV)),
1563 (VBROADCASTI32X4Z256rmkz VK8WM:$mask, addr:$src)>;
1564 def : Pat<(vselect_mask VK8WM:$mask,
1565 (bc_v8i32 (v4i64 (X86SubVBroadcast (loadv2i64 addr:$src)))),
1567 (VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
1570 // Provide fallback in case the load node that is used in the patterns above
1571 // is used by additional users, which prevents the pattern selection.
1572 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1573 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1574 (v2f64 VR128X:$src), 1)>;
1575 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1576 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1577 (v4f32 VR128X:$src), 1)>;
1578 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1579 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1580 (v2i64 VR128X:$src), 1)>;
1581 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1582 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1583 (v4i32 VR128X:$src), 1)>;
1584 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1585 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1586 (v8i16 VR128X:$src), 1)>;
1587 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1588 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1589 (v16i8 VR128X:$src), 1)>;
1592 let Predicates = [HasVLX, HasDQI] in {
1593 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1594 v4i64x_info, v2i64x_info>, VEX_W1X,
1595 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1596 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1597 v4f64x_info, v2f64x_info>, VEX_W1X,
1598 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1600 // Patterns for selects of bitcasted operations.
1601 def : Pat<(vselect_mask VK4WM:$mask,
1602 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1603 (v4f64 immAllZerosV)),
1604 (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1605 def : Pat<(vselect_mask VK4WM:$mask,
1606 (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1608 (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1609 def : Pat<(vselect_mask VK4WM:$mask,
1610 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1611 (v4i64 immAllZerosV)),
1612 (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>;
1613 def : Pat<(vselect_mask VK4WM:$mask,
1614 (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1616 (VBROADCASTI64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>;
1619 let Predicates = [HasDQI] in {
1620 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
1621 v8i64_info, v2i64x_info>, VEX_W,
1622 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1623 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
1624 v16i32_info, v8i32x_info>,
1625 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1626 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
1627 v8f64_info, v2f64x_info>, VEX_W,
1628 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1629 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
1630 v16f32_info, v8f32x_info>,
1631 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1633 // Patterns for selects of bitcasted operations.
1634 def : Pat<(vselect_mask VK16WM:$mask,
1635 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1636 (v16f32 immAllZerosV)),
1637 (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>;
1638 def : Pat<(vselect_mask VK16WM:$mask,
1639 (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))),
1641 (VBROADCASTF32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1642 def : Pat<(vselect_mask VK16WM:$mask,
1643 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1644 (v16i32 immAllZerosV)),
1645 (VBROADCASTI32X8rmkz VK16WM:$mask, addr:$src)>;
1646 def : Pat<(vselect_mask VK16WM:$mask,
1647 (bc_v16i32 (v8i64 (X86SubVBroadcast (loadv4i64 addr:$src)))),
1649 (VBROADCASTI32X8rmk VR512:$src0, VK16WM:$mask, addr:$src)>;
1651 def : Pat<(vselect_mask VK8WM:$mask,
1652 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1653 (v8f64 immAllZerosV)),
1654 (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>;
1655 def : Pat<(vselect_mask VK8WM:$mask,
1656 (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))),
1658 (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1659 def : Pat<(vselect_mask VK8WM:$mask,
1660 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1661 (v8i64 immAllZerosV)),
1662 (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>;
1663 def : Pat<(vselect_mask VK8WM:$mask,
1664 (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))),
1666 (VBROADCASTI64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>;
1669 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1670 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1671 let Predicates = [HasDQI] in
1672 defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1673 WriteShuffle256Ld, _Dst.info512,
1674 _Src.info512, _Src.info128, 0, null_frag, null_frag>,
1676 let Predicates = [HasDQI, HasVLX] in
1677 defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
1678 WriteShuffle256Ld, _Dst.info256,
1679 _Src.info256, _Src.info128, 0, null_frag, null_frag>,
1683 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1684 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1685 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1687 let Predicates = [HasDQI, HasVLX] in
1688 defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
1689 WriteShuffleXLd, _Dst.info128,
1690 _Src.info128, _Src.info128, 0, null_frag, null_frag>,
1694 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1695 avx512vl_i32_info, avx512vl_i64_info>;
1696 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1697 avx512vl_f32_info, avx512vl_f64_info>;
1699 //===----------------------------------------------------------------------===//
1700 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1702 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1703 X86VectorVTInfo _, RegisterClass KRC> {
1704 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1705 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1706 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>,
1707 EVEX, Sched<[WriteShuffle]>;
1710 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1711 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1712 let Predicates = [HasCDI] in
1713 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1714 let Predicates = [HasCDI, HasVLX] in {
1715 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1716 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1720 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1721 avx512vl_i32_info, VK16>;
1722 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1723 avx512vl_i64_info, VK8>, VEX_W;
1725 //===----------------------------------------------------------------------===//
1726 // -- VPERMI2 - 3 source operands form --
1727 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1728 X86FoldableSchedWrite sched,
1729 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1730 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1731 hasSideEffects = 0 in {
1732 defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1733 (ins _.RC:$src2, _.RC:$src3),
1734 OpcodeStr, "$src3, $src2", "$src2, $src3",
1735 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
1736 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1739 defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1740 (ins _.RC:$src2, _.MemOp:$src3),
1741 OpcodeStr, "$src3, $src2", "$src2, $src3",
1742 (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
1743 (_.VT (_.LdFrag addr:$src3)))), 1>,
1744 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1748 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1749 X86FoldableSchedWrite sched,
1750 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1751 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1752 hasSideEffects = 0, mayLoad = 1 in
1753 defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1754 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1755 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1756 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1757 (_.VT (X86VPermt2 _.RC:$src2,
1758 IdxVT.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1759 AVX5128IBase, EVEX_4V, EVEX_B,
1760 Sched<[sched.Folded, sched.ReadAfterFold]>;
1763 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1764 X86FoldableSchedWrite sched,
1765 AVX512VLVectorVTInfo VTInfo,
1766 AVX512VLVectorVTInfo ShuffleMask> {
1767 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1768 ShuffleMask.info512>,
1769 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512,
1770 ShuffleMask.info512>, EVEX_V512;
1771 let Predicates = [HasVLX] in {
1772 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1773 ShuffleMask.info128>,
1774 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128,
1775 ShuffleMask.info128>, EVEX_V128;
1776 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1777 ShuffleMask.info256>,
1778 avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256,
1779 ShuffleMask.info256>, EVEX_V256;
1783 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1784 X86FoldableSchedWrite sched,
1785 AVX512VLVectorVTInfo VTInfo,
1786 AVX512VLVectorVTInfo Idx,
1788 let Predicates = [Prd] in
1789 defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512,
1790 Idx.info512>, EVEX_V512;
1791 let Predicates = [Prd, HasVLX] in {
1792 defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128,
1793 Idx.info128>, EVEX_V128;
1794 defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256,
1795 Idx.info256>, EVEX_V256;
1799 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
1800 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1801 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
1802 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1803 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
1804 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1805 VEX_W, EVEX_CD8<16, CD8VF>;
1806 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
1807 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1809 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
1810 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1811 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
1812 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1814 // Extra patterns to deal with extra bitcasts due to passthru and index being
1815 // different types on the fp versions.
1816 multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1817 X86VectorVTInfo IdxVT,
1818 X86VectorVTInfo CastVT> {
1819 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1820 (X86VPermt2 (_.VT _.RC:$src2),
1821 (IdxVT.VT (bitconvert
1822 (CastVT.VT _.RC:$src1))),
1824 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1825 (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1826 _.RC:$src2, _.RC:$src3)>;
1827 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1828 (X86VPermt2 _.RC:$src2,
1829 (IdxVT.VT (bitconvert
1830 (CastVT.VT _.RC:$src1))),
1831 (_.LdFrag addr:$src3)),
1832 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1833 (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1834 _.RC:$src2, addr:$src3)>;
1835 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
1836 (X86VPermt2 _.RC:$src2,
1837 (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1838 (_.BroadcastLdFrag addr:$src3)),
1839 (_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1840 (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1841 _.RC:$src2, addr:$src3)>;
1844 // TODO: Should we add more casts? The vXi64 case is common due to ABI.
1845 defm : avx512_perm_i_lowering<"VPERMI2PS", v16f32_info, v16i32_info, v8i64_info>;
1846 defm : avx512_perm_i_lowering<"VPERMI2PS256", v8f32x_info, v8i32x_info, v4i64x_info>;
1847 defm : avx512_perm_i_lowering<"VPERMI2PS128", v4f32x_info, v4i32x_info, v2i64x_info>;
1850 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1851 X86FoldableSchedWrite sched,
1852 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1853 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1854 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1855 (ins IdxVT.RC:$src2, _.RC:$src3),
1856 OpcodeStr, "$src3, $src2", "$src2, $src3",
1857 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1858 EVEX_4V, AVX5128IBase, Sched<[sched]>;
1860 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1861 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1862 OpcodeStr, "$src3, $src2", "$src2, $src3",
1863 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1864 (_.LdFrag addr:$src3))), 1>,
1865 EVEX_4V, AVX5128IBase, Sched<[sched.Folded, sched.ReadAfterFold]>;
1868 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1869 X86FoldableSchedWrite sched,
1870 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1871 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1872 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1873 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1874 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1875 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1876 (_.VT (X86VPermt2 _.RC:$src1,
1877 IdxVT.RC:$src2,(_.VT (_.BroadcastLdFrag addr:$src3)))), 1>,
1878 AVX5128IBase, EVEX_4V, EVEX_B,
1879 Sched<[sched.Folded, sched.ReadAfterFold]>;
1882 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1883 X86FoldableSchedWrite sched,
1884 AVX512VLVectorVTInfo VTInfo,
1885 AVX512VLVectorVTInfo ShuffleMask> {
1886 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1887 ShuffleMask.info512>,
1888 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512,
1889 ShuffleMask.info512>, EVEX_V512;
1890 let Predicates = [HasVLX] in {
1891 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1892 ShuffleMask.info128>,
1893 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128,
1894 ShuffleMask.info128>, EVEX_V128;
1895 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1896 ShuffleMask.info256>,
1897 avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256,
1898 ShuffleMask.info256>, EVEX_V256;
1902 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1903 X86FoldableSchedWrite sched,
1904 AVX512VLVectorVTInfo VTInfo,
1905 AVX512VLVectorVTInfo Idx, Predicate Prd> {
1906 let Predicates = [Prd] in
1907 defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512,
1908 Idx.info512>, EVEX_V512;
1909 let Predicates = [Prd, HasVLX] in {
1910 defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128,
1911 Idx.info128>, EVEX_V128;
1912 defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256,
1913 Idx.info256>, EVEX_V256;
1917 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
1918 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1919 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
1920 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1921 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
1922 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1923 VEX_W, EVEX_CD8<16, CD8VF>;
1924 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
1925 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1927 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
1928 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1929 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
1930 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1932 //===----------------------------------------------------------------------===//
1933 // AVX-512 - BLEND using mask
1936 multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
1937 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1938 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1939 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1940 (ins _.RC:$src1, _.RC:$src2),
1941 !strconcat(OpcodeStr,
1942 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>,
1943 EVEX_4V, Sched<[sched]>;
1944 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1945 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1946 !strconcat(OpcodeStr,
1947 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1948 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
1949 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1950 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1951 !strconcat(OpcodeStr,
1952 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1953 []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
1954 let mayLoad = 1 in {
1955 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1956 (ins _.RC:$src1, _.MemOp:$src2),
1957 !strconcat(OpcodeStr,
1958 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1959 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
1960 Sched<[sched.Folded, sched.ReadAfterFold]>;
1961 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1962 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1963 !strconcat(OpcodeStr,
1964 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1965 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>,
1966 Sched<[sched.Folded, sched.ReadAfterFold]>;
1967 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1968 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1969 !strconcat(OpcodeStr,
1970 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1971 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
1972 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1976 multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
1977 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
1978 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in {
1979 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1980 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1981 !strconcat(OpcodeStr,
1982 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1983 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1984 EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1985 Sched<[sched.Folded, sched.ReadAfterFold]>;
1987 def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1988 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1989 !strconcat(OpcodeStr,
1990 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
1991 "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
1992 EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
1993 Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
1995 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1996 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1997 !strconcat(OpcodeStr,
1998 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1999 "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>,
2000 EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
2001 Sched<[sched.Folded, sched.ReadAfterFold]>;
2005 multiclass blendmask_dq<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2006 AVX512VLVectorVTInfo VTInfo> {
2007 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2008 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2011 let Predicates = [HasVLX] in {
2012 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2013 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2015 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2016 WriteFVarBlendask_rmb<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2021 multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched,
2022 AVX512VLVectorVTInfo VTInfo> {
2023 let Predicates = [HasBWI] in
2024 defm Z : WriteFVarBlendask<opc, OpcodeStr, sched.ZMM, VTInfo.info512>,
2027 let Predicates = [HasBWI, HasVLX] in {
2028 defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched.YMM, VTInfo.info256>,
2030 defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched.XMM, VTInfo.info128>,
2035 defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
2037 defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
2038 avx512vl_f64_info>, VEX_W;
2039 defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
2041 defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
2042 avx512vl_i64_info>, VEX_W;
2043 defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
2045 defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
2046 avx512vl_i16_info>, VEX_W;
2048 //===----------------------------------------------------------------------===//
2049 // Compare Instructions
2050 //===----------------------------------------------------------------------===//
2052 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
2054 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
2055 PatFrag OpNode_su, PatFrag OpNodeSAE_su,
2056 X86FoldableSchedWrite sched> {
2057 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2059 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2061 "$cc, $src2, $src1", "$src1, $src2, $cc",
2062 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2063 (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2064 timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2066 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2068 (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc),
2070 "$cc, $src2, $src1", "$src1, $src2, $cc",
2071 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2073 (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2),
2074 timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2075 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2077 let Uses = [MXCSR] in
2078 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2080 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2082 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
2083 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2085 (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2087 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
2089 let isCodeGenOnly = 1 in {
2090 let isCommutable = 1 in
2091 def rr : AVX512Ii8<0xC2, MRMSrcReg,
2092 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, u8imm:$cc),
2093 !strconcat("vcmp", _.Suffix,
2094 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2095 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2098 EVEX_4V, VEX_LIG, Sched<[sched]>, SIMD_EXC;
2099 def rm : AVX512Ii8<0xC2, MRMSrcMem,
2101 (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2102 !strconcat("vcmp", _.Suffix,
2103 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2104 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
2105 (_.ScalarLdFrag addr:$src2),
2107 EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
2108 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
2112 def X86cmpms_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2113 (X86cmpms node:$src1, node:$src2, node:$cc), [{
2114 return N->hasOneUse();
2116 def X86cmpmsSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2117 (X86cmpmsSAE node:$src1, node:$src2, node:$cc), [{
2118 return N->hasOneUse();
2121 let Predicates = [HasAVX512] in {
2122 let ExeDomain = SSEPackedSingle in
2123 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsSAE,
2124 X86cmpms_su, X86cmpmsSAE_su,
2125 SchedWriteFCmp.Scl>, AVX512XSIi8Base;
2126 let ExeDomain = SSEPackedDouble in
2127 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
2128 X86cmpms_su, X86cmpmsSAE_su,
2129 SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
2132 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
2133 X86FoldableSchedWrite sched,
2134 X86VectorVTInfo _, bit IsCommutable> {
2135 let isCommutable = IsCommutable, hasSideEffects = 0 in
2136 def rr : AVX512BI<opc, MRMSrcReg,
2137 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
2138 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2139 []>, EVEX_4V, Sched<[sched]>;
2140 let mayLoad = 1, hasSideEffects = 0 in
2141 def rm : AVX512BI<opc, MRMSrcMem,
2142 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
2143 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2144 []>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2145 let isCommutable = IsCommutable, hasSideEffects = 0 in
2146 def rrk : AVX512BI<opc, MRMSrcReg,
2147 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
2148 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2149 "$dst {${mask}}, $src1, $src2}"),
2150 []>, EVEX_4V, EVEX_K, Sched<[sched]>;
2151 let mayLoad = 1, hasSideEffects = 0 in
2152 def rmk : AVX512BI<opc, MRMSrcMem,
2153 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
2154 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
2155 "$dst {${mask}}, $src1, $src2}"),
2156 []>, EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2159 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr,
2160 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2162 avx512_icmp_packed<opc, OpcodeStr, sched, _, IsCommutable> {
2163 let mayLoad = 1, hasSideEffects = 0 in {
2164 def rmb : AVX512BI<opc, MRMSrcMem,
2165 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
2166 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
2167 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2168 []>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2169 def rmbk : AVX512BI<opc, MRMSrcMem,
2170 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2171 _.ScalarMemOp:$src2),
2172 !strconcat(OpcodeStr,
2173 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2174 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2175 []>, EVEX_4V, EVEX_K, EVEX_B,
2176 Sched<[sched.Folded, sched.ReadAfterFold]>;
2180 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr,
2181 X86SchedWriteWidths sched,
2182 AVX512VLVectorVTInfo VTInfo, Predicate prd,
2183 bit IsCommutable = 0> {
2184 let Predicates = [prd] in
2185 defm Z : avx512_icmp_packed<opc, OpcodeStr, sched.ZMM,
2186 VTInfo.info512, IsCommutable>, EVEX_V512;
2188 let Predicates = [prd, HasVLX] in {
2189 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, sched.YMM,
2190 VTInfo.info256, IsCommutable>, EVEX_V256;
2191 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, sched.XMM,
2192 VTInfo.info128, IsCommutable>, EVEX_V128;
2196 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
2197 X86SchedWriteWidths sched,
2198 AVX512VLVectorVTInfo VTInfo,
2199 Predicate prd, bit IsCommutable = 0> {
2200 let Predicates = [prd] in
2201 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.ZMM,
2202 VTInfo.info512, IsCommutable>, EVEX_V512;
2204 let Predicates = [prd, HasVLX] in {
2205 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.YMM,
2206 VTInfo.info256, IsCommutable>, EVEX_V256;
2207 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, sched.XMM,
2208 VTInfo.info128, IsCommutable>, EVEX_V128;
2212 // This fragment treats X86cmpm as commutable to help match loads in both
2213 // operands for PCMPEQ.
2214 def X86setcc_commute : SDNode<"ISD::SETCC", SDTSetCC, [SDNPCommutative]>;
2215 def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
2216 (setcc node:$src1, node:$src2, SETGT)>;
2218 // AddedComplexity is needed because the explicit SETEQ/SETGT CondCode doesn't
2219 // increase the pattern complexity the way an immediate would.
2220 let AddedComplexity = 2 in {
2221 // FIXME: Is there a better scheduler class for VPCMP?
2222 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
2223 SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
2224 EVEX_CD8<8, CD8VF>, VEX_WIG;
2226 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
2227 SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
2228 EVEX_CD8<16, CD8VF>, VEX_WIG;
2230 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
2231 SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
2232 EVEX_CD8<32, CD8VF>;
2234 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
2235 SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
2236 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2238 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
2239 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2240 EVEX_CD8<8, CD8VF>, VEX_WIG;
2242 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
2243 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2244 EVEX_CD8<16, CD8VF>, VEX_WIG;
2246 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
2247 SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
2248 EVEX_CD8<32, CD8VF>;
2250 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
2251 SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
2252 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
2255 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, PatFrag Frag,
2256 PatFrag Frag_su, PatFrag CommFrag, PatFrag CommFrag_su,
2257 X86FoldableSchedWrite sched,
2258 X86VectorVTInfo _, string Name> {
2259 let isCommutable = 1 in
2260 def rri : AVX512AIi8<opc, MRMSrcReg,
2261 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2262 !strconcat("vpcmp", Suffix,
2263 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2264 [(set _.KRC:$dst, (_.KVT (Frag:$cc (_.VT _.RC:$src1),
2267 EVEX_4V, Sched<[sched]>;
2268 def rmi : AVX512AIi8<opc, MRMSrcMem,
2269 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2270 !strconcat("vpcmp", Suffix,
2271 "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
2272 [(set _.KRC:$dst, (_.KVT
2275 (_.VT (_.LdFrag addr:$src2)),
2277 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
2278 let isCommutable = 1 in
2279 def rrik : AVX512AIi8<opc, MRMSrcReg,
2280 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2282 !strconcat("vpcmp", Suffix,
2283 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2284 "$dst {${mask}}, $src1, $src2, $cc}"),
2285 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2286 (_.KVT (Frag_su:$cc (_.VT _.RC:$src1),
2289 EVEX_4V, EVEX_K, Sched<[sched]>;
2290 def rmik : AVX512AIi8<opc, MRMSrcMem,
2291 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2293 !strconcat("vpcmp", Suffix,
2294 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2295 "$dst {${mask}}, $src1, $src2, $cc}"),
2296 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2300 (_.VT (_.LdFrag addr:$src2)),
2302 EVEX_4V, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2304 def : Pat<(_.KVT (CommFrag:$cc (_.LdFrag addr:$src2),
2305 (_.VT _.RC:$src1), cond)),
2306 (!cast<Instruction>(Name#_.ZSuffix#"rmi")
2307 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2309 def : Pat<(and _.KRCWM:$mask,
2310 (_.KVT (CommFrag_su:$cc (_.LdFrag addr:$src2),
2311 (_.VT _.RC:$src1), cond))),
2312 (!cast<Instruction>(Name#_.ZSuffix#"rmik")
2313 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2314 (CommFrag.OperandTransform $cc))>;
2317 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, PatFrag Frag,
2318 PatFrag Frag_su, PatFrag CommFrag,
2319 PatFrag CommFrag_su, X86FoldableSchedWrite sched,
2320 X86VectorVTInfo _, string Name> :
2321 avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2323 def rmib : AVX512AIi8<opc, MRMSrcMem,
2324 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2326 !strconcat("vpcmp", Suffix,
2327 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2328 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2329 [(set _.KRC:$dst, (_.KVT (Frag:$cc
2331 (_.BroadcastLdFrag addr:$src2),
2333 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2334 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2335 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2336 _.ScalarMemOp:$src2, u8imm:$cc),
2337 !strconcat("vpcmp", Suffix,
2338 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2339 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2340 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2343 (_.BroadcastLdFrag addr:$src2),
2345 EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2347 def : Pat<(_.KVT (CommFrag:$cc (_.BroadcastLdFrag addr:$src2),
2348 (_.VT _.RC:$src1), cond)),
2349 (!cast<Instruction>(Name#_.ZSuffix#"rmib")
2350 _.RC:$src1, addr:$src2, (CommFrag.OperandTransform $cc))>;
2352 def : Pat<(and _.KRCWM:$mask,
2353 (_.KVT (CommFrag_su:$cc (_.BroadcastLdFrag addr:$src2),
2354 (_.VT _.RC:$src1), cond))),
2355 (!cast<Instruction>(Name#_.ZSuffix#"rmibk")
2356 _.KRCWM:$mask, _.RC:$src1, addr:$src2,
2357 (CommFrag_su.OperandTransform $cc))>;
2360 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, PatFrag Frag,
2361 PatFrag Frag_su, PatFrag CommFrag,
2362 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2363 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2364 let Predicates = [prd] in
2365 defm Z : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2366 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2368 let Predicates = [prd, HasVLX] in {
2369 defm Z256 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2370 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2371 defm Z128 : avx512_icmp_cc<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2372 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2376 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, PatFrag Frag,
2377 PatFrag Frag_su, PatFrag CommFrag,
2378 PatFrag CommFrag_su, X86SchedWriteWidths sched,
2379 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2380 let Predicates = [prd] in
2381 defm Z : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2382 sched.ZMM, VTInfo.info512, NAME>, EVEX_V512;
2384 let Predicates = [prd, HasVLX] in {
2385 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2386 sched.YMM, VTInfo.info256, NAME>, EVEX_V256;
2387 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, Frag, Frag_su, CommFrag, CommFrag_su,
2388 sched.XMM, VTInfo.info128, NAME>, EVEX_V128;
2392 def X86pcmpm_imm : SDNodeXForm<setcc, [{
2393 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2394 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2395 return getI8Imm(SSECC, SDLoc(N));
2398 // Swapped operand version of the above.
2399 def X86pcmpm_imm_commute : SDNodeXForm<setcc, [{
2400 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2401 uint8_t SSECC = X86::getVPCMPImmForCond(CC);
2402 SSECC = X86::getSwappedVPCMPImm(SSECC);
2403 return getI8Imm(SSECC, SDLoc(N));
2406 def X86pcmpm : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2407 (setcc node:$src1, node:$src2, node:$cc), [{
2408 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2409 return !ISD::isUnsignedIntSetCC(CC);
2412 def X86pcmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2413 (setcc node:$src1, node:$src2, node:$cc), [{
2414 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2415 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2418 // Same as above, but commutes immediate. Use for load folding.
2419 def X86pcmpm_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2420 (setcc node:$src1, node:$src2, node:$cc), [{
2421 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2422 return !ISD::isUnsignedIntSetCC(CC);
2423 }], X86pcmpm_imm_commute>;
2425 def X86pcmpm_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2426 (setcc node:$src1, node:$src2, node:$cc), [{
2427 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2428 return N->hasOneUse() && !ISD::isUnsignedIntSetCC(CC);
2429 }], X86pcmpm_imm_commute>;
2431 def X86pcmpum : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2432 (setcc node:$src1, node:$src2, node:$cc), [{
2433 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2434 return ISD::isUnsignedIntSetCC(CC);
2437 def X86pcmpum_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2438 (setcc node:$src1, node:$src2, node:$cc), [{
2439 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2440 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2443 // Same as above, but commutes immediate. Use for load folding.
2444 def X86pcmpum_commute : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2445 (setcc node:$src1, node:$src2, node:$cc), [{
2446 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2447 return ISD::isUnsignedIntSetCC(CC);
2448 }], X86pcmpm_imm_commute>;
2450 def X86pcmpum_commute_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2451 (setcc node:$src1, node:$src2, node:$cc), [{
2452 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2453 return N->hasOneUse() && ISD::isUnsignedIntSetCC(CC);
2454 }], X86pcmpm_imm_commute>;
2456 // FIXME: Is there a better scheduler class for VPCMP/VPCMPU?
2457 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86pcmpm, X86pcmpm_su,
2458 X86pcmpm_commute, X86pcmpm_commute_su,
2459 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2461 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
2462 X86pcmpum_commute, X86pcmpum_commute_su,
2463 SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
2466 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
2467 X86pcmpm_commute, X86pcmpm_commute_su,
2468 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2469 VEX_W, EVEX_CD8<16, CD8VF>;
2470 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
2471 X86pcmpum_commute, X86pcmpum_commute_su,
2472 SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
2473 VEX_W, EVEX_CD8<16, CD8VF>;
2475 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
2476 X86pcmpm_commute, X86pcmpm_commute_su,
2477 SchedWriteVecALU, avx512vl_i32_info,
2478 HasAVX512>, EVEX_CD8<32, CD8VF>;
2479 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
2480 X86pcmpum_commute, X86pcmpum_commute_su,
2481 SchedWriteVecALU, avx512vl_i32_info,
2482 HasAVX512>, EVEX_CD8<32, CD8VF>;
2484 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
2485 X86pcmpm_commute, X86pcmpm_commute_su,
2486 SchedWriteVecALU, avx512vl_i64_info,
2487 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2488 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
2489 X86pcmpum_commute, X86pcmpum_commute_su,
2490 SchedWriteVecALU, avx512vl_i64_info,
2491 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2493 def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2494 (X86cmpm node:$src1, node:$src2, node:$cc), [{
2495 return N->hasOneUse();
2497 def X86cmpmSAE_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
2498 (X86cmpmSAE node:$src1, node:$src2, node:$cc), [{
2499 return N->hasOneUse();
2502 def X86cmpm_imm_commute : SDNodeXForm<timm, [{
2503 uint8_t Imm = X86::getSwappedVCMPImm(N->getZExtValue() & 0x1f);
2504 return getI8Imm(Imm, SDLoc(N));
2507 multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
2509 let Uses = [MXCSR], mayRaiseFPException = 1 in {
2510 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2511 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
2513 "$cc, $src2, $src1", "$src1, $src2, $cc",
2514 (X86any_cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2515 (X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2518 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2519 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2521 "$cc, $src2, $src1", "$src1, $src2, $cc",
2522 (X86any_cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2524 (X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
2526 Sched<[sched.Folded, sched.ReadAfterFold]>;
2528 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2530 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2532 "$cc, ${src2}"#_.BroadcastStr#", $src1",
2533 "$src1, ${src2}"#_.BroadcastStr#", $cc",
2534 (X86any_cmpm (_.VT _.RC:$src1),
2535 (_.VT (_.BroadcastLdFrag addr:$src2)),
2537 (X86cmpm_su (_.VT _.RC:$src1),
2538 (_.VT (_.BroadcastLdFrag addr:$src2)),
2540 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2543 // Patterns for selecting with loads in other operand.
2544 def : Pat<(X86any_cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1),
2546 (!cast<Instruction>(Name#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2,
2547 (X86cmpm_imm_commute timm:$cc))>;
2549 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.LdFrag addr:$src2),
2552 (!cast<Instruction>(Name#_.ZSuffix#"rmik") _.KRCWM:$mask,
2553 _.RC:$src1, addr:$src2,
2554 (X86cmpm_imm_commute timm:$cc))>;
2556 def : Pat<(X86any_cmpm (_.BroadcastLdFrag addr:$src2),
2557 (_.VT _.RC:$src1), timm:$cc),
2558 (!cast<Instruction>(Name#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2,
2559 (X86cmpm_imm_commute timm:$cc))>;
2561 def : Pat<(and _.KRCWM:$mask, (X86cmpm_su (_.BroadcastLdFrag addr:$src2),
2564 (!cast<Instruction>(Name#_.ZSuffix#"rmbik") _.KRCWM:$mask,
2565 _.RC:$src1, addr:$src2,
2566 (X86cmpm_imm_commute timm:$cc))>;
2569 multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
2570 // comparison code form (VCMP[EQ/LT/LE/...]
2571 let Uses = [MXCSR] in
2572 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2573 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2575 "$cc, {sae}, $src2, $src1",
2576 "$src1, $src2, {sae}, $cc",
2577 (X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
2578 (X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
2580 EVEX_B, Sched<[sched]>;
2583 multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
2584 let Predicates = [HasAVX512] in {
2585 defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
2586 avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
2589 let Predicates = [HasAVX512,HasVLX] in {
2590 defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
2591 defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
2595 defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
2596 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2597 defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
2598 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2600 // Patterns to select fp compares with load as first operand.
2601 let Predicates = [HasAVX512] in {
2602 def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
2604 (VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2606 def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
2608 (VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
2611 // ----------------------------------------------------------------
2614 def X86Vfpclasss_su : PatFrag<(ops node:$src1, node:$src2),
2615 (X86Vfpclasss node:$src1, node:$src2), [{
2616 return N->hasOneUse();
2619 def X86Vfpclass_su : PatFrag<(ops node:$src1, node:$src2),
2620 (X86Vfpclass node:$src1, node:$src2), [{
2621 return N->hasOneUse();
2624 //handle fpclass instruction mask = op(reg_scalar,imm)
2625 // op(mem_scalar,imm)
2626 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
2627 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2629 let Predicates = [prd], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2630 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2631 (ins _.RC:$src1, i32u8imm:$src2),
2632 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2633 [(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
2634 (i32 timm:$src2)))]>,
2636 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2637 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2639 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2640 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2641 (X86Vfpclasss_su (_.VT _.RC:$src1),
2642 (i32 timm:$src2))))]>,
2643 EVEX_K, Sched<[sched]>;
2644 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2645 (ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
2647 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2649 (X86Vfpclasss (_.ScalarIntMemFrags addr:$src1),
2650 (i32 timm:$src2)))]>,
2651 Sched<[sched.Folded, sched.ReadAfterFold]>;
2652 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2653 (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
2655 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2656 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2657 (X86Vfpclasss_su (_.ScalarIntMemFrags addr:$src1),
2658 (i32 timm:$src2))))]>,
2659 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2663 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2664 // fpclass(reg_vec, mem_vec, imm)
2665 // fpclass(reg_vec, broadcast(eltVt), imm)
2666 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
2667 X86FoldableSchedWrite sched, X86VectorVTInfo _,
2669 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
2670 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2671 (ins _.RC:$src1, i32u8imm:$src2),
2672 OpcodeStr#_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2673 [(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
2674 (i32 timm:$src2)))]>,
2676 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2677 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2679 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2680 [(set _.KRC:$dst,(and _.KRCWM:$mask,
2681 (X86Vfpclass_su (_.VT _.RC:$src1),
2682 (i32 timm:$src2))))]>,
2683 EVEX_K, Sched<[sched]>;
2684 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2685 (ins _.MemOp:$src1, i32u8imm:$src2),
2686 OpcodeStr#_.Suffix#"{"#mem#"}"#
2687 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2688 [(set _.KRC:$dst,(X86Vfpclass
2689 (_.VT (_.LdFrag addr:$src1)),
2690 (i32 timm:$src2)))]>,
2691 Sched<[sched.Folded, sched.ReadAfterFold]>;
2692 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2693 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2694 OpcodeStr#_.Suffix#"{"#mem#"}"#
2695 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2696 [(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
2697 (_.VT (_.LdFrag addr:$src1)),
2698 (i32 timm:$src2))))]>,
2699 EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2700 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2701 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2702 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2703 _.BroadcastStr#", $dst|$dst, ${src1}"
2704 #_.BroadcastStr#", $src2}",
2705 [(set _.KRC:$dst,(X86Vfpclass
2706 (_.VT (_.BroadcastLdFrag addr:$src1)),
2707 (i32 timm:$src2)))]>,
2708 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
2709 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2710 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2711 OpcodeStr#_.Suffix#"\t{$src2, ${src1}"#
2712 _.BroadcastStr#", $dst {${mask}}|$dst {${mask}}, ${src1}"#
2713 _.BroadcastStr#", $src2}",
2714 [(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
2715 (_.VT (_.BroadcastLdFrag addr:$src1)),
2716 (i32 timm:$src2))))]>,
2717 EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
2720 // Allow registers or broadcast with the x, y, z suffix we use to disambiguate
2722 def : InstAlias<OpcodeStr#_.Suffix#mem#
2723 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2724 (!cast<Instruction>(NAME#"rr")
2725 _.KRC:$dst, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2726 def : InstAlias<OpcodeStr#_.Suffix#mem#
2727 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2728 (!cast<Instruction>(NAME#"rrk")
2729 _.KRC:$dst, _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), 0, "att">;
2730 def : InstAlias<OpcodeStr#_.Suffix#mem#
2731 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst|$dst, ${src1}"#
2732 _.BroadcastStr#", $src2}",
2733 (!cast<Instruction>(NAME#"rmb")
2734 _.KRC:$dst, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2735 def : InstAlias<OpcodeStr#_.Suffix#mem#
2736 "\t{$src2, ${src1}"#_.BroadcastStr#", $dst {${mask}}|"
2737 "$dst {${mask}}, ${src1}"#_.BroadcastStr#", $src2}",
2738 (!cast<Instruction>(NAME#"rmbk")
2739 _.KRC:$dst, _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), 0, "att">;
2742 multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
2743 bits<8> opc, X86SchedWriteWidths sched,
2745 let Predicates = [prd] in {
2746 defm Z : avx512_vector_fpclass<opc, OpcodeStr, sched.ZMM,
2747 _.info512, "z">, EVEX_V512;
2749 let Predicates = [prd, HasVLX] in {
2750 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, sched.XMM,
2751 _.info128, "x">, EVEX_V128;
2752 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, sched.YMM,
2753 _.info256, "y">, EVEX_V256;
2757 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2758 bits<8> opcScalar, X86SchedWriteWidths sched,
2760 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2762 EVEX_CD8<32, CD8VF>;
2763 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2765 EVEX_CD8<64, CD8VF> , VEX_W;
2766 defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2767 sched.Scl, f32x_info, prd>, VEX_LIG,
2768 EVEX_CD8<32, CD8VT1>;
2769 defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
2770 sched.Scl, f64x_info, prd>, VEX_LIG,
2771 EVEX_CD8<64, CD8VT1>, VEX_W;
2774 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
2775 HasDQI>, AVX512AIi8Base, EVEX;
2777 //-----------------------------------------------------------------
2778 // Mask register copy, including
2779 // - copy between mask registers
2780 // - load/store mask registers
2781 // - copy from GPR to mask register and vice versa
2783 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2784 string OpcodeStr, RegisterClass KRC,
2785 ValueType vvt, X86MemOperand x86memop> {
2786 let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2787 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2788 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2790 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2791 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2792 [(set KRC:$dst, (vvt (load addr:$src)))]>,
2794 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2795 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2796 [(store KRC:$src, addr:$dst)]>,
2797 Sched<[WriteStore]>;
2800 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2802 RegisterClass KRC, RegisterClass GRC> {
2803 let hasSideEffects = 0 in {
2804 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2805 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2807 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2808 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2813 let Predicates = [HasDQI] in
2814 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2815 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2818 let Predicates = [HasAVX512] in
2819 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2820 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2823 let Predicates = [HasBWI] in {
2824 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2826 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2828 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2830 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2834 // GR from/to mask register
2835 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2836 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2837 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2838 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2840 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2841 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2842 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2843 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2845 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2846 (KMOVWrk VK16:$src)>;
2847 def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2848 (SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
2849 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2850 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2851 def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2852 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
2854 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2855 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2856 def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2857 (SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
2858 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2859 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2860 def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2861 (INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
2863 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2864 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2865 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2866 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2867 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2868 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2869 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2870 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2873 let Predicates = [HasDQI] in {
2874 def : Pat<(store VK1:$src, addr:$dst),
2875 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2877 def : Pat<(v1i1 (load addr:$src)),
2878 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK1)>;
2879 def : Pat<(v2i1 (load addr:$src)),
2880 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2881 def : Pat<(v4i1 (load addr:$src)),
2882 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2885 let Predicates = [HasAVX512] in {
2886 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2887 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2888 def : Pat<(v16i1 (bitconvert (loadi16 addr:$src))),
2889 (KMOVWkm addr:$src)>;
2892 def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
2893 SDTypeProfile<1, 2, [SDTCisVT<0, i8>,
2894 SDTCVecEltisVT<1, i1>,
2897 let Predicates = [HasAVX512] in {
2898 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2899 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2900 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2902 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2903 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2905 def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))),
2906 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2908 def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))),
2909 (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>;
2912 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2913 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2914 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2915 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2916 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2917 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2918 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2920 def : Pat<(insert_subvector (v16i1 immAllZerosV),
2921 (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
2924 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
2928 // Mask unary operation
2930 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2931 RegisterClass KRC, SDPatternOperator OpNode,
2932 X86FoldableSchedWrite sched, Predicate prd> {
2933 let Predicates = [prd] in
2934 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2935 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2936 [(set KRC:$dst, (OpNode KRC:$src))]>,
2940 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2941 SDPatternOperator OpNode,
2942 X86FoldableSchedWrite sched> {
2943 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2944 sched, HasDQI>, VEX, PD;
2945 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2946 sched, HasAVX512>, VEX, PS;
2947 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2948 sched, HasBWI>, VEX, PD, VEX_W;
2949 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2950 sched, HasBWI>, VEX, PS, VEX_W;
2953 // TODO - do we need a X86SchedWriteWidths::KMASK type?
2954 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>;
2956 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2957 let Predicates = [HasAVX512, NoDQI] in
2958 def : Pat<(vnot VK8:$src),
2959 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2961 def : Pat<(vnot VK4:$src),
2962 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2963 def : Pat<(vnot VK2:$src),
2964 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2965 def : Pat<(vnot VK1:$src),
2966 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK1:$src, VK16)), VK2)>;
2968 // Mask binary operation
2969 // - KAND, KANDN, KOR, KXNOR, KXOR
2970 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2971 RegisterClass KRC, SDPatternOperator OpNode,
2972 X86FoldableSchedWrite sched, Predicate prd,
2974 let Predicates = [prd], isCommutable = IsCommutable in
2975 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2976 !strconcat(OpcodeStr,
2977 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2978 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>,
2982 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2983 SDPatternOperator OpNode,
2984 X86FoldableSchedWrite sched, bit IsCommutable,
2985 Predicate prdW = HasAVX512> {
2986 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2987 sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2988 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2989 sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2990 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2991 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2992 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2993 sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2996 // These nodes use 'vnot' instead of 'not' to support vectors.
2997 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2998 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
3000 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3001 defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>;
3002 defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>;
3003 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>;
3004 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>;
3005 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>;
3006 defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
3008 multiclass avx512_binop_pat<SDPatternOperator VOpNode,
3010 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
3011 // for the DQI set, this type is legal and KxxxB instruction is used
3012 let Predicates = [NoDQI] in
3013 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
3015 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
3016 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
3018 // All types smaller than 8 bits require conversion anyway
3019 def : Pat<(VOpNode VK1:$src1, VK1:$src2),
3020 (COPY_TO_REGCLASS (Inst
3021 (COPY_TO_REGCLASS VK1:$src1, VK16),
3022 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
3023 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
3024 (COPY_TO_REGCLASS (Inst
3025 (COPY_TO_REGCLASS VK2:$src1, VK16),
3026 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
3027 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
3028 (COPY_TO_REGCLASS (Inst
3029 (COPY_TO_REGCLASS VK4:$src1, VK16),
3030 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
3033 defm : avx512_binop_pat<and, KANDWrr>;
3034 defm : avx512_binop_pat<vandn, KANDNWrr>;
3035 defm : avx512_binop_pat<or, KORWrr>;
3036 defm : avx512_binop_pat<vxnor, KXNORWrr>;
3037 defm : avx512_binop_pat<xor, KXORWrr>;
3040 multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
3041 X86KVectorVTInfo Src, X86FoldableSchedWrite sched,
3043 let Predicates = [prd] in {
3044 let hasSideEffects = 0 in
3045 def rr : I<0x4b, MRMSrcReg, (outs Dst.KRC:$dst),
3046 (ins Src.KRC:$src1, Src.KRC:$src2),
3047 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
3048 VEX_4V, VEX_L, Sched<[sched]>;
3050 def : Pat<(Dst.KVT (concat_vectors Src.KRC:$src1, Src.KRC:$src2)),
3051 (!cast<Instruction>(NAME#rr) Src.KRC:$src2, Src.KRC:$src1)>;
3055 defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
3056 defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
3057 defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
3060 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3061 SDNode OpNode, X86FoldableSchedWrite sched,
3063 let Predicates = [prd], Defs = [EFLAGS] in
3064 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
3065 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
3066 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>,
3070 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
3071 X86FoldableSchedWrite sched,
3072 Predicate prdW = HasAVX512> {
3073 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>,
3075 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
3077 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
3079 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
3083 // TODO - do we need a X86SchedWriteWidths::KMASK type?
3084 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>;
3085 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>;
3088 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3089 SDNode OpNode, X86FoldableSchedWrite sched> {
3090 let Predicates = [HasAVX512] in
3091 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3092 !strconcat(OpcodeStr,
3093 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3094 [(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
3098 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3099 SDNode OpNode, X86FoldableSchedWrite sched> {
3100 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
3101 sched>, VEX, TAPD, VEX_W;
3102 let Predicates = [HasDQI] in
3103 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
3105 let Predicates = [HasBWI] in {
3106 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
3107 sched>, VEX, TAPD, VEX_W;
3108 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
3113 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>;
3114 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>;
3116 // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction.
3117 multiclass axv512_icmp_packed_cc_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3119 X86VectorVTInfo Narrow,
3120 X86VectorVTInfo Wide> {
3121 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3122 (Narrow.VT Narrow.RC:$src2), cond)),
3124 (!cast<Instruction>(InstStr#"Zrri")
3125 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3126 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3127 (Frag.OperandTransform $cc)), Narrow.KRC)>;
3129 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3130 (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3131 (Narrow.VT Narrow.RC:$src2),
3133 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3134 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3135 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3136 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3137 (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3140 multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering<PatFrag Frag, PatFrag Frag_su,
3141 PatFrag CommFrag, PatFrag CommFrag_su,
3143 X86VectorVTInfo Narrow,
3144 X86VectorVTInfo Wide> {
3146 def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1),
3147 (Narrow.BroadcastLdFrag addr:$src2), cond)),
3149 (!cast<Instruction>(InstStr#"Zrmib")
3150 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3151 addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>;
3153 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3155 (Frag_su:$cc (Narrow.VT Narrow.RC:$src1),
3156 (Narrow.BroadcastLdFrag addr:$src2),
3158 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3159 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3160 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3161 addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>;
3163 // Commuted with broadcast load.
3164 def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2),
3165 (Narrow.VT Narrow.RC:$src1),
3168 (!cast<Instruction>(InstStr#"Zrmib")
3169 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3170 addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>;
3172 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3174 (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2),
3175 (Narrow.VT Narrow.RC:$src1),
3177 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmibk")
3178 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3179 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3180 addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>;
3183 // Same as above, but for fp types which don't use PatFrags.
3184 multiclass axv512_cmp_packed_cc_no_vlx_lowering<string InstStr,
3185 X86VectorVTInfo Narrow,
3186 X86VectorVTInfo Wide> {
3187 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3188 (Narrow.VT Narrow.RC:$src2), timm:$cc)),
3190 (!cast<Instruction>(InstStr#"Zrri")
3191 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3192 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3193 timm:$cc), Narrow.KRC)>;
3195 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3196 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3197 (Narrow.VT Narrow.RC:$src2), timm:$cc))),
3198 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrrik")
3199 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3200 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3201 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)),
3202 timm:$cc), Narrow.KRC)>;
3205 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1),
3206 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)),
3208 (!cast<Instruction>(InstStr#"Zrmbi")
3209 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3210 addr:$src2, timm:$cc), Narrow.KRC)>;
3212 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3213 (X86cmpm_su (Narrow.VT Narrow.RC:$src1),
3214 (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))),
3215 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3216 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3217 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3218 addr:$src2, timm:$cc), Narrow.KRC)>;
3220 // Commuted with broadcast load.
3221 def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3222 (Narrow.VT Narrow.RC:$src1), timm:$cc)),
3224 (!cast<Instruction>(InstStr#"Zrmbi")
3225 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3226 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3228 def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
3229 (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)),
3230 (Narrow.VT Narrow.RC:$src1), timm:$cc))),
3231 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr#"Zrmbik")
3232 (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
3233 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
3234 addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>;
3237 let Predicates = [HasAVX512, NoVLX] in {
3238 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v8i32x_info, v16i32_info>;
3239 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3241 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPD", v4i32x_info, v16i32_info>;
3242 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3244 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3245 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3247 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3248 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3250 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v8i32x_info, v16i32_info>;
3251 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v8i32x_info, v16i32_info>;
3253 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPD", v4i32x_info, v16i32_info>;
3254 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUD", v4i32x_info, v16i32_info>;
3256 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v4i64x_info, v8i64_info>;
3257 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v4i64x_info, v8i64_info>;
3259 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpm, X86pcmpm_su, X86pcmpm_commute, X86pcmpm_commute_su, "VPCMPQ", v2i64x_info, v8i64_info>;
3260 defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering<X86pcmpum, X86pcmpum_su, X86pcmpum_commute, X86pcmpum_commute_su, "VPCMPUQ", v2i64x_info, v8i64_info>;
3262 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>;
3263 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>;
3264 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>;
3265 defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>;
3268 let Predicates = [HasBWI, NoVLX] in {
3269 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v32i8x_info, v64i8_info>;
3270 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v32i8x_info, v64i8_info>;
3272 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPB", v16i8x_info, v64i8_info>;
3273 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUB", v16i8x_info, v64i8_info>;
3275 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v16i16x_info, v32i16_info>;
3276 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v16i16x_info, v32i16_info>;
3278 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpm, X86pcmpm_su, "VPCMPW", v8i16x_info, v32i16_info>;
3279 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86pcmpum, X86pcmpum_su, "VPCMPUW", v8i16x_info, v32i16_info>;
3282 // Mask setting all 0s or 1s
3283 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3284 let Predicates = [HasAVX512] in
3285 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
3286 SchedRW = [WriteZero] in
3287 def NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3288 [(set KRC:$dst, (VT Val))]>;
3291 multiclass avx512_mask_setop_w<PatFrag Val> {
3292 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3293 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3294 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3297 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3298 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3300 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3301 let Predicates = [HasAVX512] in {
3302 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3303 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3304 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3305 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3306 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3307 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3308 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3309 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3312 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3313 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3314 RegisterClass RC, ValueType VT> {
3315 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3316 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3318 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3319 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3321 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3322 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3323 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3324 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3325 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3326 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3328 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3329 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3330 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3331 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3332 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3334 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3335 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3336 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3337 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3339 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3340 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3341 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3343 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3344 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3346 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3348 //===----------------------------------------------------------------------===//
3349 // AVX-512 - Aligned and unaligned load and store
3352 multiclass avx512_load<bits<8> opc, string OpcodeStr, string Name,
3353 X86VectorVTInfo _, PatFrag ld_frag, PatFrag mload,
3354 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3355 bit NoRMPattern = 0,
3356 SDPatternOperator SelectOprr = vselect> {
3357 let hasSideEffects = 0 in {
3358 let isMoveReg = 1 in
3359 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3360 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3361 _.ExeDomain>, EVEX, Sched<[Sched.RR]>,
3362 EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
3363 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3364 (ins _.KRCWM:$mask, _.RC:$src),
3365 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3366 "${dst} {${mask}} {z}, $src}"),
3367 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3369 _.ImmAllZerosV)))], _.ExeDomain>,
3370 EVEX, EVEX_KZ, Sched<[Sched.RR]>;
3372 let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1 in
3373 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3374 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3375 !if(NoRMPattern, [],
3377 (_.VT (ld_frag addr:$src)))]),
3378 _.ExeDomain>, EVEX, Sched<[Sched.RM]>,
3379 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
3381 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3382 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3383 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3384 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3385 "${dst} {${mask}}, $src1}"),
3386 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3388 (_.VT _.RC:$src0))))], _.ExeDomain>,
3389 EVEX, EVEX_K, Sched<[Sched.RR]>;
3390 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3391 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3392 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3393 "${dst} {${mask}}, $src1}"),
3394 [(set _.RC:$dst, (_.VT
3395 (vselect_mask _.KRCWM:$mask,
3396 (_.VT (ld_frag addr:$src1)),
3397 (_.VT _.RC:$src0))))], _.ExeDomain>,
3398 EVEX, EVEX_K, Sched<[Sched.RM]>;
3400 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3401 (ins _.KRCWM:$mask, _.MemOp:$src),
3402 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3403 "${dst} {${mask}} {z}, $src}",
3404 [(set _.RC:$dst, (_.VT (vselect_mask _.KRCWM:$mask,
3405 (_.VT (ld_frag addr:$src)), _.ImmAllZerosV)))],
3406 _.ExeDomain>, EVEX, EVEX_KZ, Sched<[Sched.RM]>;
3408 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3409 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3411 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3412 (!cast<Instruction>(Name#_.ZSuffix#rmkz) _.KRCWM:$mask, addr:$ptr)>;
3414 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3415 (!cast<Instruction>(Name#_.ZSuffix#rmk) _.RC:$src0,
3416 _.KRCWM:$mask, addr:$ptr)>;
3419 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3420 AVX512VLVectorVTInfo _, Predicate prd,
3421 X86SchedWriteMoveLSWidths Sched,
3422 string EVEX2VEXOvrd, bit NoRMPattern = 0> {
3423 let Predicates = [prd] in
3424 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512,
3425 _.info512.AlignedLdFrag, masked_load_aligned,
3426 Sched.ZMM, "", NoRMPattern>, EVEX_V512;
3428 let Predicates = [prd, HasVLX] in {
3429 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256,
3430 _.info256.AlignedLdFrag, masked_load_aligned,
3431 Sched.YMM, EVEX2VEXOvrd#"Y", NoRMPattern>, EVEX_V256;
3432 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128,
3433 _.info128.AlignedLdFrag, masked_load_aligned,
3434 Sched.XMM, EVEX2VEXOvrd, NoRMPattern>, EVEX_V128;
3438 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3439 AVX512VLVectorVTInfo _, Predicate prd,
3440 X86SchedWriteMoveLSWidths Sched,
3441 string EVEX2VEXOvrd, bit NoRMPattern = 0,
3442 SDPatternOperator SelectOprr = vselect> {
3443 let Predicates = [prd] in
3444 defm Z : avx512_load<opc, OpcodeStr, NAME, _.info512, _.info512.LdFrag,
3445 masked_load, Sched.ZMM, "",
3446 NoRMPattern, SelectOprr>, EVEX_V512;
3448 let Predicates = [prd, HasVLX] in {
3449 defm Z256 : avx512_load<opc, OpcodeStr, NAME, _.info256, _.info256.LdFrag,
3450 masked_load, Sched.YMM, EVEX2VEXOvrd#"Y",
3451 NoRMPattern, SelectOprr>, EVEX_V256;
3452 defm Z128 : avx512_load<opc, OpcodeStr, NAME, _.info128, _.info128.LdFrag,
3453 masked_load, Sched.XMM, EVEX2VEXOvrd,
3454 NoRMPattern, SelectOprr>, EVEX_V128;
3458 multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
3459 X86VectorVTInfo _, PatFrag st_frag, PatFrag mstore,
3460 X86SchedWriteMoveLS Sched, string EVEX2VEXOvrd,
3461 bit NoMRPattern = 0> {
3462 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
3463 let isMoveReg = 1 in
3464 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3465 OpcodeStr # "\t{$src, $dst|$dst, $src}",
3466 [], _.ExeDomain>, EVEX,
3467 FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
3468 EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
3469 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3470 (ins _.KRCWM:$mask, _.RC:$src),
3471 OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
3472 "${dst} {${mask}}, $src}",
3473 [], _.ExeDomain>, EVEX, EVEX_K,
3474 FoldGenData<BaseName#_.ZSuffix#rrk>,
3476 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3477 (ins _.KRCWM:$mask, _.RC:$src),
3478 OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
3479 "${dst} {${mask}} {z}, $src}",
3480 [], _.ExeDomain>, EVEX, EVEX_KZ,
3481 FoldGenData<BaseName#_.ZSuffix#rrkz>,
3485 let hasSideEffects = 0, mayStore = 1 in
3486 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3487 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3488 !if(NoMRPattern, [],
3489 [(st_frag (_.VT _.RC:$src), addr:$dst)]),
3490 _.ExeDomain>, EVEX, Sched<[Sched.MR]>,
3491 EVEX2VEXOverride<EVEX2VEXOvrd#"mr">;
3492 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3493 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3494 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3495 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
3498 def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
3499 (!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
3500 _.KRCWM:$mask, _.RC:$src)>;
3502 def : InstAlias<OpcodeStr#".s\t{$src, $dst|$dst, $src}",
3503 (!cast<Instruction>(BaseName#_.ZSuffix#"rr_REV")
3504 _.RC:$dst, _.RC:$src), 0>;
3505 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3506 (!cast<Instruction>(BaseName#_.ZSuffix#"rrk_REV")
3507 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3508 def : InstAlias<OpcodeStr#".s\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}",
3509 (!cast<Instruction>(BaseName#_.ZSuffix#"rrkz_REV")
3510 _.RC:$dst, _.KRCWM:$mask, _.RC:$src), 0>;
3513 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3514 AVX512VLVectorVTInfo _, Predicate prd,
3515 X86SchedWriteMoveLSWidths Sched,
3516 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3517 let Predicates = [prd] in
3518 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, store,
3519 masked_store, Sched.ZMM, "",
3520 NoMRPattern>, EVEX_V512;
3521 let Predicates = [prd, HasVLX] in {
3522 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, store,
3523 masked_store, Sched.YMM,
3524 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3525 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, store,
3526 masked_store, Sched.XMM, EVEX2VEXOvrd,
3527 NoMRPattern>, EVEX_V128;
3531 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3532 AVX512VLVectorVTInfo _, Predicate prd,
3533 X86SchedWriteMoveLSWidths Sched,
3534 string EVEX2VEXOvrd, bit NoMRPattern = 0> {
3535 let Predicates = [prd] in
3536 defm Z : avx512_store<opc, OpcodeStr, NAME, _.info512, alignedstore,
3537 masked_store_aligned, Sched.ZMM, "",
3538 NoMRPattern>, EVEX_V512;
3540 let Predicates = [prd, HasVLX] in {
3541 defm Z256 : avx512_store<opc, OpcodeStr, NAME, _.info256, alignedstore,
3542 masked_store_aligned, Sched.YMM,
3543 EVEX2VEXOvrd#"Y", NoMRPattern>, EVEX_V256;
3544 defm Z128 : avx512_store<opc, OpcodeStr, NAME, _.info128, alignedstore,
3545 masked_store_aligned, Sched.XMM, EVEX2VEXOvrd,
3546 NoMRPattern>, EVEX_V128;
3550 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3551 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3552 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3553 HasAVX512, SchedWriteFMoveLS, "VMOVAPS">,
3554 PS, EVEX_CD8<32, CD8VF>;
3556 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3557 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3558 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3559 HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
3560 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3562 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3563 SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
3564 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3565 SchedWriteFMoveLS, "VMOVUPS">,
3566 PS, EVEX_CD8<32, CD8VF>;
3568 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3569 SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
3570 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3571 SchedWriteFMoveLS, "VMOVUPD">,
3572 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3574 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3575 HasAVX512, SchedWriteVecMoveLS,
3577 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3578 HasAVX512, SchedWriteVecMoveLS,
3580 PD, EVEX_CD8<32, CD8VF>;
3582 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3583 HasAVX512, SchedWriteVecMoveLS,
3585 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3586 HasAVX512, SchedWriteVecMoveLS,
3588 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3590 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3591 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3592 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, HasBWI,
3593 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3594 XD, EVEX_CD8<8, CD8VF>;
3596 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3597 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3598 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
3599 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3600 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3602 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3603 SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
3604 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3605 SchedWriteVecMoveLS, "VMOVDQU", 1>,
3606 XS, EVEX_CD8<32, CD8VF>;
3608 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3609 SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
3610 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3611 SchedWriteVecMoveLS, "VMOVDQU">,
3612 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3614 // Special instructions to help with spilling when we don't have VLX. We need
3615 // to load or store from a ZMM register instead. These are converted in
3616 // expandPostRAPseudos.
3617 let isReMaterializable = 1, canFoldAsLoad = 1,
3618 isPseudo = 1, mayLoad = 1, hasSideEffects = 0 in {
3619 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3620 "", []>, Sched<[WriteFLoadX]>;
3621 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3622 "", []>, Sched<[WriteFLoadY]>;
3623 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3624 "", []>, Sched<[WriteFLoadX]>;
3625 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3626 "", []>, Sched<[WriteFLoadY]>;
3629 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3630 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3631 "", []>, Sched<[WriteFStoreX]>;
3632 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3633 "", []>, Sched<[WriteFStoreY]>;
3634 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3635 "", []>, Sched<[WriteFStoreX]>;
3636 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3637 "", []>, Sched<[WriteFStoreY]>;
3640 def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV),
3641 (v8i64 VR512:$src))),
3642 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3645 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3646 (v16i32 VR512:$src))),
3647 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3649 // These patterns exist to prevent the above patterns from introducing a second
3650 // mask inversion when one already exists.
3651 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3652 (v8i64 immAllZerosV),
3653 (v8i64 VR512:$src))),
3654 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3655 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3656 (v16i32 immAllZerosV),
3657 (v16i32 VR512:$src))),
3658 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3660 multiclass mask_move_lowering<string InstrStr, X86VectorVTInfo Narrow,
3661 X86VectorVTInfo Wide> {
3662 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3663 Narrow.RC:$src1, Narrow.RC:$src0)),
3666 (!cast<Instruction>(InstrStr#"rrk")
3667 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src0, Narrow.SubRegIdx)),
3668 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3669 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3672 def : Pat<(Narrow.VT (vselect (Narrow.KVT Narrow.KRCWM:$mask),
3673 Narrow.RC:$src1, Narrow.ImmAllZerosV)),
3676 (!cast<Instruction>(InstrStr#"rrkz")
3677 (COPY_TO_REGCLASS Narrow.KRCWM:$mask, Wide.KRCWM),
3678 (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)))),
3682 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3683 // available. Use a 512-bit operation and extract.
3684 let Predicates = [HasAVX512, NoVLX] in {
3685 defm : mask_move_lowering<"VMOVAPSZ", v4f32x_info, v16f32_info>;
3686 defm : mask_move_lowering<"VMOVDQA32Z", v4i32x_info, v16i32_info>;
3687 defm : mask_move_lowering<"VMOVAPSZ", v8f32x_info, v16f32_info>;
3688 defm : mask_move_lowering<"VMOVDQA32Z", v8i32x_info, v16i32_info>;
3690 defm : mask_move_lowering<"VMOVAPDZ", v2f64x_info, v8f64_info>;
3691 defm : mask_move_lowering<"VMOVDQA64Z", v2i64x_info, v8i64_info>;
3692 defm : mask_move_lowering<"VMOVAPDZ", v4f64x_info, v8f64_info>;
3693 defm : mask_move_lowering<"VMOVDQA64Z", v4i64x_info, v8i64_info>;
3696 let Predicates = [HasBWI, NoVLX] in {
3697 defm : mask_move_lowering<"VMOVDQU8Z", v16i8x_info, v64i8_info>;
3698 defm : mask_move_lowering<"VMOVDQU8Z", v32i8x_info, v64i8_info>;
3700 defm : mask_move_lowering<"VMOVDQU16Z", v8i16x_info, v32i16_info>;
3701 defm : mask_move_lowering<"VMOVDQU16Z", v16i16x_info, v32i16_info>;
3704 let Predicates = [HasAVX512] in {
3706 def : Pat<(alignedloadv16i32 addr:$src),
3707 (VMOVDQA64Zrm addr:$src)>;
3708 def : Pat<(alignedloadv32i16 addr:$src),
3709 (VMOVDQA64Zrm addr:$src)>;
3710 def : Pat<(alignedloadv64i8 addr:$src),
3711 (VMOVDQA64Zrm addr:$src)>;
3712 def : Pat<(loadv16i32 addr:$src),
3713 (VMOVDQU64Zrm addr:$src)>;
3714 def : Pat<(loadv32i16 addr:$src),
3715 (VMOVDQU64Zrm addr:$src)>;
3716 def : Pat<(loadv64i8 addr:$src),
3717 (VMOVDQU64Zrm addr:$src)>;
3720 def : Pat<(alignedstore (v16i32 VR512:$src), addr:$dst),
3721 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3722 def : Pat<(alignedstore (v32i16 VR512:$src), addr:$dst),
3723 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3724 def : Pat<(alignedstore (v64i8 VR512:$src), addr:$dst),
3725 (VMOVDQA64Zmr addr:$dst, VR512:$src)>;
3726 def : Pat<(store (v16i32 VR512:$src), addr:$dst),
3727 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3728 def : Pat<(store (v32i16 VR512:$src), addr:$dst),
3729 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3730 def : Pat<(store (v64i8 VR512:$src), addr:$dst),
3731 (VMOVDQU64Zmr addr:$dst, VR512:$src)>;
3734 let Predicates = [HasVLX] in {
3736 def : Pat<(alignedloadv4i32 addr:$src),
3737 (VMOVDQA64Z128rm addr:$src)>;
3738 def : Pat<(alignedloadv8i16 addr:$src),
3739 (VMOVDQA64Z128rm addr:$src)>;
3740 def : Pat<(alignedloadv16i8 addr:$src),
3741 (VMOVDQA64Z128rm addr:$src)>;
3742 def : Pat<(loadv4i32 addr:$src),
3743 (VMOVDQU64Z128rm addr:$src)>;
3744 def : Pat<(loadv8i16 addr:$src),
3745 (VMOVDQU64Z128rm addr:$src)>;
3746 def : Pat<(loadv16i8 addr:$src),
3747 (VMOVDQU64Z128rm addr:$src)>;
3750 def : Pat<(alignedstore (v4i32 VR128X:$src), addr:$dst),
3751 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3752 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3753 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3754 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3755 (VMOVDQA64Z128mr addr:$dst, VR128X:$src)>;
3756 def : Pat<(store (v4i32 VR128X:$src), addr:$dst),
3757 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3758 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3759 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3760 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3761 (VMOVDQU64Z128mr addr:$dst, VR128X:$src)>;
3764 def : Pat<(alignedloadv8i32 addr:$src),
3765 (VMOVDQA64Z256rm addr:$src)>;
3766 def : Pat<(alignedloadv16i16 addr:$src),
3767 (VMOVDQA64Z256rm addr:$src)>;
3768 def : Pat<(alignedloadv32i8 addr:$src),
3769 (VMOVDQA64Z256rm addr:$src)>;
3770 def : Pat<(loadv8i32 addr:$src),
3771 (VMOVDQU64Z256rm addr:$src)>;
3772 def : Pat<(loadv16i16 addr:$src),
3773 (VMOVDQU64Z256rm addr:$src)>;
3774 def : Pat<(loadv32i8 addr:$src),
3775 (VMOVDQU64Z256rm addr:$src)>;
3778 def : Pat<(alignedstore (v8i32 VR256X:$src), addr:$dst),
3779 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3780 def : Pat<(alignedstore (v16i16 VR256X:$src), addr:$dst),
3781 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3782 def : Pat<(alignedstore (v32i8 VR256X:$src), addr:$dst),
3783 (VMOVDQA64Z256mr addr:$dst, VR256X:$src)>;
3784 def : Pat<(store (v8i32 VR256X:$src), addr:$dst),
3785 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3786 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3787 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3788 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3789 (VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
3792 // Move Int Doubleword to Packed Double Int
3794 let ExeDomain = SSEPackedInt in {
3795 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3796 "vmovd\t{$src, $dst|$dst, $src}",
3798 (v4i32 (scalar_to_vector GR32:$src)))]>,
3799 EVEX, Sched<[WriteVecMoveFromGpr]>;
3800 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3801 "vmovd\t{$src, $dst|$dst, $src}",
3803 (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
3804 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecLoad]>;
3805 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3806 "vmovq\t{$src, $dst|$dst, $src}",
3808 (v2i64 (scalar_to_vector GR64:$src)))]>,
3809 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3810 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3811 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3813 "vmovq\t{$src, $dst|$dst, $src}", []>,
3814 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
3815 let isCodeGenOnly = 1 in {
3816 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3817 "vmovq\t{$src, $dst|$dst, $src}",
3818 [(set FR64X:$dst, (bitconvert GR64:$src))]>,
3819 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3820 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3821 "vmovq\t{$src, $dst|$dst, $src}",
3822 [(set GR64:$dst, (bitconvert FR64X:$src))]>,
3823 EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
3825 } // ExeDomain = SSEPackedInt
3827 // Move Int Doubleword to Single Scalar
3829 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3830 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3831 "vmovd\t{$src, $dst|$dst, $src}",
3832 [(set FR32X:$dst, (bitconvert GR32:$src))]>,
3833 EVEX, Sched<[WriteVecMoveFromGpr]>;
3834 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3836 // Move doubleword from xmm register to r/m32
3838 let ExeDomain = SSEPackedInt in {
3839 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3840 "vmovd\t{$src, $dst|$dst, $src}",
3841 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3843 EVEX, Sched<[WriteVecMoveToGpr]>;
3844 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3845 (ins i32mem:$dst, VR128X:$src),
3846 "vmovd\t{$src, $dst|$dst, $src}",
3847 [(store (i32 (extractelt (v4i32 VR128X:$src),
3848 (iPTR 0))), addr:$dst)]>,
3849 EVEX, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecStore]>;
3850 } // ExeDomain = SSEPackedInt
3852 // Move quadword from xmm1 register to r/m64
3854 let ExeDomain = SSEPackedInt in {
3855 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3856 "vmovq\t{$src, $dst|$dst, $src}",
3857 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3859 PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
3860 Requires<[HasAVX512]>;
3862 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3863 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3864 "vmovq\t{$src, $dst|$dst, $src}", []>, PD,
3865 EVEX, VEX_W, Sched<[WriteVecStore]>,
3866 Requires<[HasAVX512, In64BitMode]>;
3868 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3869 (ins i64mem:$dst, VR128X:$src),
3870 "vmovq\t{$src, $dst|$dst, $src}",
3871 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3873 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3874 Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
3876 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
3877 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3879 "vmovq\t{$src, $dst|$dst, $src}", []>,
3880 EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
3881 } // ExeDomain = SSEPackedInt
3883 def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
3884 (VMOVPQI2QIZrr VR128X:$dst, VR128X:$src), 0>;
3886 let Predicates = [HasAVX512] in {
3887 def : Pat<(X86vextractstore64 (v2i64 VR128X:$src), addr:$dst),
3888 (VMOVPQI2QIZmr addr:$dst, VR128X:$src)>;
3891 // Move Scalar Single to Double Int
3893 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3894 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3896 "vmovd\t{$src, $dst|$dst, $src}",
3897 [(set GR32:$dst, (bitconvert FR32X:$src))]>,
3898 EVEX, Sched<[WriteVecMoveToGpr]>;
3899 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3901 // Move Quadword Int to Packed Quadword Int
3903 let ExeDomain = SSEPackedInt in {
3904 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3906 "vmovq\t{$src, $dst|$dst, $src}",
3908 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3909 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
3910 } // ExeDomain = SSEPackedInt
3912 // Allow "vmovd" but print "vmovq".
3913 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3914 (VMOV64toPQIZrr VR128X:$dst, GR64:$src), 0>;
3915 def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}",
3916 (VMOVPQIto64Zrr GR64:$dst, VR128X:$src), 0>;
3918 // Conversions between masks and scalar fp.
3919 def : Pat<(v32i1 (bitconvert FR32X:$src)),
3920 (KMOVDkr (VMOVSS2DIZrr FR32X:$src))>;
3921 def : Pat<(f32 (bitconvert VK32:$src)),
3922 (VMOVDI2SSZrr (KMOVDrk VK32:$src))>;
3924 def : Pat<(v64i1 (bitconvert FR64X:$src)),
3925 (KMOVQkr (VMOVSDto64Zrr FR64X:$src))>;
3926 def : Pat<(f64 (bitconvert VK64:$src)),
3927 (VMOV64toSDZrr (KMOVQrk VK64:$src))>;
3929 //===----------------------------------------------------------------------===//
3930 // AVX-512 MOVSS, MOVSD
3931 //===----------------------------------------------------------------------===//
3933 multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
3934 X86VectorVTInfo _> {
3935 let Predicates = [HasAVX512, OptForSize] in
3936 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3937 (ins _.RC:$src1, _.RC:$src2),
3938 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3939 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))],
3940 _.ExeDomain>, EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
3941 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3942 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3943 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3944 "$dst {${mask}} {z}, $src1, $src2}"),
3945 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3946 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3948 _.ExeDomain>, EVEX_4V, EVEX_KZ, Sched<[SchedWriteFShuffle.XMM]>;
3949 let Constraints = "$src0 = $dst" in
3950 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3951 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
3952 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3953 "$dst {${mask}}, $src1, $src2}"),
3954 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3955 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
3956 (_.VT _.RC:$src0))))],
3957 _.ExeDomain>, EVEX_4V, EVEX_K, Sched<[SchedWriteFShuffle.XMM]>;
3958 let canFoldAsLoad = 1, isReMaterializable = 1 in {
3959 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst), (ins _.ScalarMemOp:$src),
3960 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3961 [(set _.RC:$dst, (_.VT (vzload_frag addr:$src)))],
3962 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3963 // _alt version uses FR32/FR64 register class.
3964 let isCodeGenOnly = 1 in
3965 def rm_alt : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3966 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3967 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3968 _.ExeDomain>, EVEX, Sched<[WriteFLoad]>;
3970 let mayLoad = 1, hasSideEffects = 0 in {
3971 let Constraints = "$src0 = $dst" in
3972 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3973 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3974 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3975 "$dst {${mask}}, $src}"),
3976 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFLoad]>;
3977 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3978 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3979 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3980 "$dst {${mask}} {z}, $src}"),
3981 [], _.ExeDomain>, EVEX, EVEX_KZ, Sched<[WriteFLoad]>;
3983 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3984 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3985 [(store _.FRC:$src, addr:$dst)], _.ExeDomain>,
3986 EVEX, Sched<[WriteFStore]>;
3987 let mayStore = 1, hasSideEffects = 0 in
3988 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3989 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
3990 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3991 [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
3995 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
3996 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3998 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
3999 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
4002 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
4003 PatLeaf ZeroFP, X86VectorVTInfo _> {
4005 def : Pat<(_.VT (OpNode _.RC:$src0,
4006 (_.VT (scalar_to_vector
4007 (_.EltVT (X86selects VK1WM:$mask,
4008 (_.EltVT _.FRC:$src1),
4009 (_.EltVT _.FRC:$src2))))))),
4010 (!cast<Instruction>(InstrStr#rrk)
4011 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, _.RC)),
4014 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4016 def : Pat<(_.VT (OpNode _.RC:$src0,
4017 (_.VT (scalar_to_vector
4018 (_.EltVT (X86selects VK1WM:$mask,
4019 (_.EltVT _.FRC:$src1),
4020 (_.EltVT ZeroFP))))))),
4021 (!cast<Instruction>(InstrStr#rrkz)
4024 (_.VT (COPY_TO_REGCLASS _.FRC:$src1, _.RC)))>;
4027 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4028 dag Mask, RegisterClass MaskRC> {
4030 def : Pat<(masked_store
4031 (_.info512.VT (insert_subvector undef,
4032 (_.info128.VT _.info128.RC:$src),
4033 (iPTR 0))), addr:$dst, Mask),
4034 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4035 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4036 _.info128.RC:$src)>;
4040 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
4041 AVX512VLVectorVTInfo _,
4042 dag Mask, RegisterClass MaskRC,
4043 SubRegIndex subreg> {
4045 def : Pat<(masked_store
4046 (_.info512.VT (insert_subvector undef,
4047 (_.info128.VT _.info128.RC:$src),
4048 (iPTR 0))), addr:$dst, Mask),
4049 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4050 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4051 _.info128.RC:$src)>;
4055 // This matches the more recent codegen from clang that avoids emitting a 512
4056 // bit masked store directly. Codegen will widen 128-bit masked store to 512
4057 // bits on AVX512F only targets.
4058 multiclass avx512_store_scalar_lowering_subreg2<string InstrStr,
4059 AVX512VLVectorVTInfo _,
4060 dag Mask512, dag Mask128,
4061 RegisterClass MaskRC,
4062 SubRegIndex subreg> {
4065 def : Pat<(masked_store
4066 (_.info512.VT (insert_subvector undef,
4067 (_.info128.VT _.info128.RC:$src),
4068 (iPTR 0))), addr:$dst, Mask512),
4069 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4070 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4071 _.info128.RC:$src)>;
4073 // AVX512VL pattern.
4074 def : Pat<(masked_store (_.info128.VT _.info128.RC:$src), addr:$dst, Mask128),
4075 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
4076 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4077 _.info128.RC:$src)>;
4080 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
4081 dag Mask, RegisterClass MaskRC> {
4083 def : Pat<(_.info128.VT (extract_subvector
4084 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4085 _.info512.ImmAllZerosV)),
4087 (!cast<Instruction>(InstrStr#rmkz)
4088 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4091 def : Pat<(_.info128.VT (extract_subvector
4092 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4093 (_.info512.VT (insert_subvector undef,
4094 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4097 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4098 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
4103 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
4104 AVX512VLVectorVTInfo _,
4105 dag Mask, RegisterClass MaskRC,
4106 SubRegIndex subreg> {
4108 def : Pat<(_.info128.VT (extract_subvector
4109 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4110 _.info512.ImmAllZerosV)),
4112 (!cast<Instruction>(InstrStr#rmkz)
4113 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4116 def : Pat<(_.info128.VT (extract_subvector
4117 (_.info512.VT (masked_load addr:$srcAddr, Mask,
4118 (_.info512.VT (insert_subvector undef,
4119 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4122 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4123 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4128 // This matches the more recent codegen from clang that avoids emitting a 512
4129 // bit masked load directly. Codegen will widen 128-bit masked load to 512
4130 // bits on AVX512F only targets.
4131 multiclass avx512_load_scalar_lowering_subreg2<string InstrStr,
4132 AVX512VLVectorVTInfo _,
4133 dag Mask512, dag Mask128,
4134 RegisterClass MaskRC,
4135 SubRegIndex subreg> {
4136 // AVX512F patterns.
4137 def : Pat<(_.info128.VT (extract_subvector
4138 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4139 _.info512.ImmAllZerosV)),
4141 (!cast<Instruction>(InstrStr#rmkz)
4142 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4145 def : Pat<(_.info128.VT (extract_subvector
4146 (_.info512.VT (masked_load addr:$srcAddr, Mask512,
4147 (_.info512.VT (insert_subvector undef,
4148 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
4151 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4152 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4155 // AVX512Vl patterns.
4156 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4157 _.info128.ImmAllZerosV)),
4158 (!cast<Instruction>(InstrStr#rmkz)
4159 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4162 def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
4163 (_.info128.VT (X86vzmovl _.info128.RC:$src)))),
4164 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
4165 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
4169 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
4170 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
4172 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4173 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4174 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4175 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4176 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4177 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4179 defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4180 (v16i1 (insert_subvector
4181 (v16i1 immAllZerosV),
4182 (v4i1 (extract_subvector
4183 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4186 (v4i1 (extract_subvector
4187 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4188 (iPTR 0))), GR8, sub_8bit>;
4189 defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4194 (v16i1 immAllZerosV),
4195 (v2i1 (extract_subvector
4196 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4200 (v2i1 (extract_subvector
4201 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4202 (iPTR 0))), GR8, sub_8bit>;
4204 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
4205 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
4206 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
4207 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
4208 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
4209 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
4211 defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
4212 (v16i1 (insert_subvector
4213 (v16i1 immAllZerosV),
4214 (v4i1 (extract_subvector
4215 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4218 (v4i1 (extract_subvector
4219 (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
4220 (iPTR 0))), GR8, sub_8bit>;
4221 defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
4226 (v16i1 immAllZerosV),
4227 (v2i1 (extract_subvector
4228 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4232 (v2i1 (extract_subvector
4233 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
4234 (iPTR 0))), GR8, sub_8bit>;
4236 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
4237 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
4238 (v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
4239 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4240 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4242 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)),
4243 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
4244 (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>;
4246 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))),
4248 (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)),
4249 VK1WM:$mask, addr:$src)),
4251 def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)),
4252 (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>;
4254 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
4255 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrk
4256 (v2f64 (COPY_TO_REGCLASS FR64X:$src2, VR128X)),
4257 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4258 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4260 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)),
4261 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)),
4262 (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>;
4264 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))),
4266 (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)),
4267 VK1WM:$mask, addr:$src)),
4269 def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)),
4270 (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>;
4273 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 VR128X:$src2))),
4274 (VMOVSSZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4275 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 VR128X:$src2))),
4276 (VMOVSDZrrk VR128X:$src2, VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4278 def : Pat<(v4f32 (X86selects VK1WM:$mask, (v4f32 VR128X:$src1), (v4f32 immAllZerosV))),
4279 (VMOVSSZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4280 def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZerosV))),
4281 (VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
4283 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
4284 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4285 (ins VR128X:$src1, VR128X:$src2),
4286 "vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4287 []>, XS, EVEX_4V, VEX_LIG,
4288 FoldGenData<"VMOVSSZrr">,
4289 Sched<[SchedWriteFShuffle.XMM]>;
4291 let Constraints = "$src0 = $dst" in
4292 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4293 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
4294 VR128X:$src1, VR128X:$src2),
4295 "vmovss\t{$src2, $src1, $dst {${mask}}|"#
4296 "$dst {${mask}}, $src1, $src2}",
4297 []>, EVEX_K, XS, EVEX_4V, VEX_LIG,
4298 FoldGenData<"VMOVSSZrrk">,
4299 Sched<[SchedWriteFShuffle.XMM]>;
4301 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4302 (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
4303 "vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
4304 "$dst {${mask}} {z}, $src1, $src2}",
4305 []>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
4306 FoldGenData<"VMOVSSZrrkz">,
4307 Sched<[SchedWriteFShuffle.XMM]>;
4309 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4310 (ins VR128X:$src1, VR128X:$src2),
4311 "vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4312 []>, XD, EVEX_4V, VEX_LIG, VEX_W,
4313 FoldGenData<"VMOVSDZrr">,
4314 Sched<[SchedWriteFShuffle.XMM]>;
4316 let Constraints = "$src0 = $dst" in
4317 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4318 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
4319 VR128X:$src1, VR128X:$src2),
4320 "vmovsd\t{$src2, $src1, $dst {${mask}}|"#
4321 "$dst {${mask}}, $src1, $src2}",
4322 []>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4323 VEX_W, FoldGenData<"VMOVSDZrrk">,
4324 Sched<[SchedWriteFShuffle.XMM]>;
4326 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4327 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4329 "vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
4330 "$dst {${mask}} {z}, $src1, $src2}",
4331 []>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4332 VEX_W, FoldGenData<"VMOVSDZrrkz">,
4333 Sched<[SchedWriteFShuffle.XMM]>;
4336 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4337 (VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4338 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
4339 "$dst {${mask}}, $src1, $src2}",
4340 (VMOVSSZrrk_REV VR128X:$dst, VK1WM:$mask,
4341 VR128X:$src1, VR128X:$src2), 0>;
4342 def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4343 "$dst {${mask}} {z}, $src1, $src2}",
4344 (VMOVSSZrrkz_REV VR128X:$dst, VK1WM:$mask,
4345 VR128X:$src1, VR128X:$src2), 0>;
4346 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4347 (VMOVSDZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
4348 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4349 "$dst {${mask}}, $src1, $src2}",
4350 (VMOVSDZrrk_REV VR128X:$dst, VK1WM:$mask,
4351 VR128X:$src1, VR128X:$src2), 0>;
4352 def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4353 "$dst {${mask}} {z}, $src1, $src2}",
4354 (VMOVSDZrrkz_REV VR128X:$dst, VK1WM:$mask,
4355 VR128X:$src1, VR128X:$src2), 0>;
4357 let Predicates = [HasAVX512, OptForSize] in {
4358 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4359 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
4360 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4361 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
4363 // Move low f32 and clear high bits.
4364 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4365 (SUBREG_TO_REG (i32 0),
4366 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4367 (v4f32 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4368 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4369 (SUBREG_TO_REG (i32 0),
4370 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4371 (v4i32 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)))), sub_xmm)>;
4373 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4374 (SUBREG_TO_REG (i32 0),
4375 (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4376 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)))), sub_xmm)>;
4377 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4378 (SUBREG_TO_REG (i32 0),
4379 (v4i32 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4380 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)))), sub_xmm)>;
4383 // Use 128-bit blends for OptForSpeed since BLENDs have better throughput than
4384 // VMOVSS/SD. Unfortunately, loses the ability to use XMM16-31.
4385 let Predicates = [HasAVX512, OptForSpeed] in {
4386 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4387 (SUBREG_TO_REG (i32 0),
4388 (v4f32 (VBLENDPSrri (v4f32 (V_SET0)),
4389 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)),
4390 (i8 1))), sub_xmm)>;
4391 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4392 (SUBREG_TO_REG (i32 0),
4393 (v4i32 (VPBLENDWrri (v4i32 (V_SET0)),
4394 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)),
4395 (i8 3))), sub_xmm)>;
4398 let Predicates = [HasAVX512] in {
4399 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4400 (VMOVSSZrm addr:$src)>;
4401 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4402 (VMOVSDZrm addr:$src)>;
4404 // Represent the same patterns above but in the form they appear for
4406 def : Pat<(v8f32 (X86vzload32 addr:$src)),
4407 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4408 def : Pat<(v4f64 (X86vzload64 addr:$src)),
4409 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4411 // Represent the same patterns above but in the form they appear for
4413 def : Pat<(v16f32 (X86vzload32 addr:$src)),
4414 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4415 def : Pat<(v8f64 (X86vzload64 addr:$src)),
4416 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4419 let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
4420 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4422 "vmovq\t{$src, $dst|$dst, $src}",
4423 [(set VR128X:$dst, (v2i64 (X86vzmovl
4424 (v2i64 VR128X:$src))))]>,
4428 let Predicates = [HasAVX512] in {
4429 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4430 (VMOVDI2PDIZrr GR32:$src)>;
4432 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4433 (VMOV64toPQIZrr GR64:$src)>;
4435 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4436 def : Pat<(v4i32 (X86vzload32 addr:$src)),
4437 (VMOVDI2PDIZrm addr:$src)>;
4438 def : Pat<(v8i32 (X86vzload32 addr:$src)),
4439 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4440 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4441 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4442 def : Pat<(v2i64 (X86vzload64 addr:$src)),
4443 (VMOVQI2PQIZrm addr:$src)>;
4444 def : Pat<(v4i64 (X86vzload64 addr:$src)),
4445 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4447 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4448 def : Pat<(v16i32 (X86vzload32 addr:$src)),
4449 (SUBREG_TO_REG (i32 0), (v4i32 (VMOVDI2PDIZrm addr:$src)), sub_xmm)>;
4450 def : Pat<(v8i64 (X86vzload64 addr:$src)),
4451 (SUBREG_TO_REG (i64 0), (v2i64 (VMOVQI2PQIZrm addr:$src)), sub_xmm)>;
4453 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4454 (SUBREG_TO_REG (i32 0),
4455 (v2f64 (VMOVZPQILo2PQIZrr
4456 (v2f64 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)))),
4458 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4459 (SUBREG_TO_REG (i32 0),
4460 (v2i64 (VMOVZPQILo2PQIZrr
4461 (v2i64 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)))),
4464 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4465 (SUBREG_TO_REG (i32 0),
4466 (v2f64 (VMOVZPQILo2PQIZrr
4467 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)))),
4469 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4470 (SUBREG_TO_REG (i32 0),
4471 (v2i64 (VMOVZPQILo2PQIZrr
4472 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)))),
4476 //===----------------------------------------------------------------------===//
4477 // AVX-512 - Non-temporals
4478 //===----------------------------------------------------------------------===//
4480 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4481 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4482 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.ZMM.RM]>,
4483 EVEX, T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>;
4485 let Predicates = [HasVLX] in {
4486 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4488 "vmovntdqa\t{$src, $dst|$dst, $src}",
4489 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.YMM.RM]>,
4490 EVEX, T8PD, EVEX_V256, EVEX_CD8<64, CD8VF>;
4492 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4494 "vmovntdqa\t{$src, $dst|$dst, $src}",
4495 [], SSEPackedInt>, Sched<[SchedWriteVecMoveLS.XMM.RM]>,
4496 EVEX, T8PD, EVEX_V128, EVEX_CD8<64, CD8VF>;
4499 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4500 X86SchedWriteMoveLS Sched,
4501 PatFrag st_frag = alignednontemporalstore> {
4502 let SchedRW = [Sched.MR], AddedComplexity = 400 in
4503 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4504 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4505 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4506 _.ExeDomain>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4509 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4510 AVX512VLVectorVTInfo VTInfo,
4511 X86SchedWriteMoveLSWidths Sched> {
4512 let Predicates = [HasAVX512] in
4513 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512, Sched.ZMM>, EVEX_V512;
4515 let Predicates = [HasAVX512, HasVLX] in {
4516 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256, Sched.YMM>, EVEX_V256;
4517 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128, Sched.XMM>, EVEX_V128;
4521 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
4522 SchedWriteVecMoveLSNT>, PD;
4523 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
4524 SchedWriteFMoveLSNT>, PD, VEX_W;
4525 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
4526 SchedWriteFMoveLSNT>, PS;
4528 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4529 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4530 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4531 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4532 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4533 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4534 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4536 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4537 (VMOVNTDQAZrm addr:$src)>;
4538 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4539 (VMOVNTDQAZrm addr:$src)>;
4540 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4541 (VMOVNTDQAZrm addr:$src)>;
4542 def : Pat<(v16i32 (alignednontemporalload addr:$src)),
4543 (VMOVNTDQAZrm addr:$src)>;
4544 def : Pat<(v32i16 (alignednontemporalload addr:$src)),
4545 (VMOVNTDQAZrm addr:$src)>;
4546 def : Pat<(v64i8 (alignednontemporalload addr:$src)),
4547 (VMOVNTDQAZrm addr:$src)>;
4550 let Predicates = [HasVLX], AddedComplexity = 400 in {
4551 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4552 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4553 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4554 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4555 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4556 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4558 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4559 (VMOVNTDQAZ256rm addr:$src)>;
4560 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4561 (VMOVNTDQAZ256rm addr:$src)>;
4562 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4563 (VMOVNTDQAZ256rm addr:$src)>;
4564 def : Pat<(v8i32 (alignednontemporalload addr:$src)),
4565 (VMOVNTDQAZ256rm addr:$src)>;
4566 def : Pat<(v16i16 (alignednontemporalload addr:$src)),
4567 (VMOVNTDQAZ256rm addr:$src)>;
4568 def : Pat<(v32i8 (alignednontemporalload addr:$src)),
4569 (VMOVNTDQAZ256rm addr:$src)>;
4571 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4572 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4573 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4574 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4575 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4576 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4578 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4579 (VMOVNTDQAZ128rm addr:$src)>;
4580 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4581 (VMOVNTDQAZ128rm addr:$src)>;
4582 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4583 (VMOVNTDQAZ128rm addr:$src)>;
4584 def : Pat<(v4i32 (alignednontemporalload addr:$src)),
4585 (VMOVNTDQAZ128rm addr:$src)>;
4586 def : Pat<(v8i16 (alignednontemporalload addr:$src)),
4587 (VMOVNTDQAZ128rm addr:$src)>;
4588 def : Pat<(v16i8 (alignednontemporalload addr:$src)),
4589 (VMOVNTDQAZ128rm addr:$src)>;
4592 //===----------------------------------------------------------------------===//
4593 // AVX-512 - Integer arithmetic
4595 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4596 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4597 bit IsCommutable = 0> {
4598 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4599 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4600 "$src2, $src1", "$src1, $src2",
4601 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4602 IsCommutable, IsCommutable>, AVX512BIBase, EVEX_4V,
4605 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4606 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4607 "$src2, $src1", "$src1, $src2",
4608 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2)))>,
4609 AVX512BIBase, EVEX_4V,
4610 Sched<[sched.Folded, sched.ReadAfterFold]>;
4613 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4614 X86VectorVTInfo _, X86FoldableSchedWrite sched,
4615 bit IsCommutable = 0> :
4616 avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> {
4617 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4618 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4619 "${src2}"#_.BroadcastStr#", $src1",
4620 "$src1, ${src2}"#_.BroadcastStr,
4621 (_.VT (OpNode _.RC:$src1,
4622 (_.BroadcastLdFrag addr:$src2)))>,
4623 AVX512BIBase, EVEX_4V, EVEX_B,
4624 Sched<[sched.Folded, sched.ReadAfterFold]>;
4627 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4628 AVX512VLVectorVTInfo VTInfo,
4629 X86SchedWriteWidths sched, Predicate prd,
4630 bit IsCommutable = 0> {
4631 let Predicates = [prd] in
4632 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4633 IsCommutable>, EVEX_V512;
4635 let Predicates = [prd, HasVLX] in {
4636 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256,
4637 sched.YMM, IsCommutable>, EVEX_V256;
4638 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128,
4639 sched.XMM, IsCommutable>, EVEX_V128;
4643 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4644 AVX512VLVectorVTInfo VTInfo,
4645 X86SchedWriteWidths sched, Predicate prd,
4646 bit IsCommutable = 0> {
4647 let Predicates = [prd] in
4648 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched.ZMM,
4649 IsCommutable>, EVEX_V512;
4651 let Predicates = [prd, HasVLX] in {
4652 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4653 sched.YMM, IsCommutable>, EVEX_V256;
4654 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4655 sched.XMM, IsCommutable>, EVEX_V128;
4659 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4660 X86SchedWriteWidths sched, Predicate prd,
4661 bit IsCommutable = 0> {
4662 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4663 sched, prd, IsCommutable>,
4664 VEX_W, EVEX_CD8<64, CD8VF>;
4667 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4668 X86SchedWriteWidths sched, Predicate prd,
4669 bit IsCommutable = 0> {
4670 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4671 sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4674 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4675 X86SchedWriteWidths sched, Predicate prd,
4676 bit IsCommutable = 0> {
4677 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4678 sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
4682 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4683 X86SchedWriteWidths sched, Predicate prd,
4684 bit IsCommutable = 0> {
4685 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4686 sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
4690 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4691 SDNode OpNode, X86SchedWriteWidths sched,
4692 Predicate prd, bit IsCommutable = 0> {
4693 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd,
4696 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd,
4700 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4701 SDNode OpNode, X86SchedWriteWidths sched,
4702 Predicate prd, bit IsCommutable = 0> {
4703 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd,
4706 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd,
4710 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4711 bits<8> opc_d, bits<8> opc_q,
4712 string OpcodeStr, SDNode OpNode,
4713 X86SchedWriteWidths sched,
4714 bit IsCommutable = 0> {
4715 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4716 sched, HasAVX512, IsCommutable>,
4717 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4718 sched, HasBWI, IsCommutable>;
4721 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr,
4722 X86FoldableSchedWrite sched,
4723 SDNode OpNode,X86VectorVTInfo _Src,
4724 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4725 bit IsCommutable = 0> {
4726 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4727 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4728 "$src2, $src1","$src1, $src2",
4730 (_Src.VT _Src.RC:$src1),
4731 (_Src.VT _Src.RC:$src2))),
4733 AVX512BIBase, EVEX_4V, Sched<[sched]>;
4734 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4735 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4736 "$src2, $src1", "$src1, $src2",
4737 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4738 (_Src.LdFrag addr:$src2)))>,
4739 AVX512BIBase, EVEX_4V,
4740 Sched<[sched.Folded, sched.ReadAfterFold]>;
4742 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4743 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4745 "${src2}"#_Brdct.BroadcastStr#", $src1",
4746 "$src1, ${src2}"#_Brdct.BroadcastStr,
4747 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4748 (_Brdct.VT (_Brdct.BroadcastLdFrag addr:$src2)))))>,
4749 AVX512BIBase, EVEX_4V, EVEX_B,
4750 Sched<[sched.Folded, sched.ReadAfterFold]>;
4753 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4754 SchedWriteVecALU, 1>;
4755 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4756 SchedWriteVecALU, 0>;
4757 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
4758 SchedWriteVecALU, HasBWI, 1>;
4759 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
4760 SchedWriteVecALU, HasBWI, 0>;
4761 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
4762 SchedWriteVecALU, HasBWI, 1>;
4763 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
4764 SchedWriteVecALU, HasBWI, 0>;
4765 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4766 SchedWritePMULLD, HasAVX512, 1>, T8PD;
4767 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4768 SchedWriteVecIMul, HasBWI, 1>;
4769 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4770 SchedWriteVecIMul, HasDQI, 1>, T8PD,
4771 NotEVEX2VEXConvertible;
4772 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SchedWriteVecIMul,
4774 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SchedWriteVecIMul,
4776 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs,
4777 SchedWriteVecIMul, HasBWI, 1>, T8PD;
4778 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4779 SchedWriteVecALU, HasBWI, 1>;
4780 defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq,
4781 SchedWriteVecIMul, HasAVX512, 1>, T8PD;
4782 defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq,
4783 SchedWriteVecIMul, HasAVX512, 1>;
4785 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
4786 X86SchedWriteWidths sched,
4787 AVX512VLVectorVTInfo _SrcVTInfo,
4788 AVX512VLVectorVTInfo _DstVTInfo,
4789 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4790 let Predicates = [prd] in
4791 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
4792 _SrcVTInfo.info512, _DstVTInfo.info512,
4793 v8i64_info, IsCommutable>,
4794 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4795 let Predicates = [HasVLX, prd] in {
4796 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
4797 _SrcVTInfo.info256, _DstVTInfo.info256,
4798 v4i64x_info, IsCommutable>,
4799 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4800 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
4801 _SrcVTInfo.info128, _DstVTInfo.info128,
4802 v2i64x_info, IsCommutable>,
4803 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4807 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SchedWriteVecALU,
4808 avx512vl_i8_info, avx512vl_i8_info,
4809 X86multishift, HasVBMI, 0>, T8PD;
4811 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4812 X86VectorVTInfo _Src, X86VectorVTInfo _Dst,
4813 X86FoldableSchedWrite sched> {
4814 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4815 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4817 "${src2}"#_Src.BroadcastStr#", $src1",
4818 "$src1, ${src2}"#_Src.BroadcastStr,
4819 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4820 (_Src.VT (_Src.BroadcastLdFrag addr:$src2)))))>,
4821 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>,
4822 Sched<[sched.Folded, sched.ReadAfterFold]>;
4825 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4826 SDNode OpNode,X86VectorVTInfo _Src,
4827 X86VectorVTInfo _Dst, X86FoldableSchedWrite sched,
4828 bit IsCommutable = 0> {
4829 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4830 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4831 "$src2, $src1","$src1, $src2",
4833 (_Src.VT _Src.RC:$src1),
4834 (_Src.VT _Src.RC:$src2))),
4835 IsCommutable, IsCommutable>,
4836 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>;
4837 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4838 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4839 "$src2, $src1", "$src1, $src2",
4840 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4841 (_Src.LdFrag addr:$src2)))>,
4842 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>,
4843 Sched<[sched.Folded, sched.ReadAfterFold]>;
4846 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4848 let Predicates = [HasBWI] in
4849 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4850 v32i16_info, SchedWriteShuffle.ZMM>,
4851 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4852 v32i16_info, SchedWriteShuffle.ZMM>, EVEX_V512;
4853 let Predicates = [HasBWI, HasVLX] in {
4854 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4855 v16i16x_info, SchedWriteShuffle.YMM>,
4856 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4857 v16i16x_info, SchedWriteShuffle.YMM>,
4859 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4860 v8i16x_info, SchedWriteShuffle.XMM>,
4861 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4862 v8i16x_info, SchedWriteShuffle.XMM>,
4866 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4868 let Predicates = [HasBWI] in
4869 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
4870 SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
4871 let Predicates = [HasBWI, HasVLX] in {
4872 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4873 v32i8x_info, SchedWriteShuffle.YMM>,
4875 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4876 v16i8x_info, SchedWriteShuffle.XMM>,
4881 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4882 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4883 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4884 let Predicates = [HasBWI] in
4885 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4886 _Dst.info512, SchedWriteVecIMul.ZMM,
4887 IsCommutable>, EVEX_V512;
4888 let Predicates = [HasBWI, HasVLX] in {
4889 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4890 _Dst.info256, SchedWriteVecIMul.YMM,
4891 IsCommutable>, EVEX_V256;
4892 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4893 _Dst.info128, SchedWriteVecIMul.XMM,
4894 IsCommutable>, EVEX_V128;
4898 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4899 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4900 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4901 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4903 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4904 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
4905 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4906 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
4908 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4909 SchedWriteVecALU, HasBWI, 1>, T8PD;
4910 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4911 SchedWriteVecALU, HasBWI, 1>;
4912 defm VPMAXSD : avx512_binop_rm_vl_d<0x3D, "vpmaxsd", smax,
4913 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4914 defm VPMAXSQ : avx512_binop_rm_vl_q<0x3D, "vpmaxsq", smax,
4915 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4916 NotEVEX2VEXConvertible;
4918 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4919 SchedWriteVecALU, HasBWI, 1>;
4920 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4921 SchedWriteVecALU, HasBWI, 1>, T8PD;
4922 defm VPMAXUD : avx512_binop_rm_vl_d<0x3F, "vpmaxud", umax,
4923 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4924 defm VPMAXUQ : avx512_binop_rm_vl_q<0x3F, "vpmaxuq", umax,
4925 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4926 NotEVEX2VEXConvertible;
4928 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4929 SchedWriteVecALU, HasBWI, 1>, T8PD;
4930 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4931 SchedWriteVecALU, HasBWI, 1>;
4932 defm VPMINSD : avx512_binop_rm_vl_d<0x39, "vpminsd", smin,
4933 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4934 defm VPMINSQ : avx512_binop_rm_vl_q<0x39, "vpminsq", smin,
4935 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4936 NotEVEX2VEXConvertible;
4938 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4939 SchedWriteVecALU, HasBWI, 1>;
4940 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4941 SchedWriteVecALU, HasBWI, 1>, T8PD;
4942 defm VPMINUD : avx512_binop_rm_vl_d<0x3B, "vpminud", umin,
4943 SchedWriteVecALU, HasAVX512, 1>, T8PD;
4944 defm VPMINUQ : avx512_binop_rm_vl_q<0x3B, "vpminuq", umin,
4945 SchedWriteVecALU, HasAVX512, 1>, T8PD,
4946 NotEVEX2VEXConvertible;
4948 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4949 let Predicates = [HasDQI, NoVLX] in {
4950 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4953 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4954 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4956 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4959 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4963 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4966 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4967 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4969 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4972 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4977 multiclass avx512_min_max_lowering<string Instr, SDNode OpNode> {
4978 def : Pat<(v4i64 (OpNode VR256X:$src1, VR256X:$src2)),
4980 (!cast<Instruction>(Instr#"rr")
4981 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4982 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4984 def : Pat<(v4i64 (OpNode (v4i64 VR256X:$src1), (v4i64 (X86VBroadcastld64 addr:$src2)))),
4986 (!cast<Instruction>(Instr#"rmb")
4987 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4991 def : Pat<(v2i64 (OpNode VR128X:$src1, VR128X:$src2)),
4993 (!cast<Instruction>(Instr#"rr")
4994 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4995 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4997 def : Pat<(v2i64 (OpNode (v2i64 VR128X:$src1), (v2i64 (X86VBroadcastld64 addr:$src2)))),
4999 (!cast<Instruction>(Instr#"rmb")
5000 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5005 let Predicates = [HasAVX512, NoVLX] in {
5006 defm : avx512_min_max_lowering<"VPMAXUQZ", umax>;
5007 defm : avx512_min_max_lowering<"VPMINUQZ", umin>;
5008 defm : avx512_min_max_lowering<"VPMAXSQZ", smax>;
5009 defm : avx512_min_max_lowering<"VPMINSQZ", smin>;
5012 //===----------------------------------------------------------------------===//
5013 // AVX-512 Logical Instructions
5014 //===----------------------------------------------------------------------===//
5016 defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
5017 SchedWriteVecLogic, HasAVX512, 1>;
5018 defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
5019 SchedWriteVecLogic, HasAVX512, 1>;
5020 defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
5021 SchedWriteVecLogic, HasAVX512, 1>;
5022 defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
5023 SchedWriteVecLogic, HasAVX512>;
5025 let Predicates = [HasVLX] in {
5026 def : Pat<(v16i8 (and VR128X:$src1, VR128X:$src2)),
5027 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5028 def : Pat<(v8i16 (and VR128X:$src1, VR128X:$src2)),
5029 (VPANDQZ128rr VR128X:$src1, VR128X:$src2)>;
5031 def : Pat<(v16i8 (or VR128X:$src1, VR128X:$src2)),
5032 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5033 def : Pat<(v8i16 (or VR128X:$src1, VR128X:$src2)),
5034 (VPORQZ128rr VR128X:$src1, VR128X:$src2)>;
5036 def : Pat<(v16i8 (xor VR128X:$src1, VR128X:$src2)),
5037 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5038 def : Pat<(v8i16 (xor VR128X:$src1, VR128X:$src2)),
5039 (VPXORQZ128rr VR128X:$src1, VR128X:$src2)>;
5041 def : Pat<(v16i8 (X86andnp VR128X:$src1, VR128X:$src2)),
5042 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5043 def : Pat<(v8i16 (X86andnp VR128X:$src1, VR128X:$src2)),
5044 (VPANDNQZ128rr VR128X:$src1, VR128X:$src2)>;
5046 def : Pat<(and VR128X:$src1, (loadv16i8 addr:$src2)),
5047 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5048 def : Pat<(and VR128X:$src1, (loadv8i16 addr:$src2)),
5049 (VPANDQZ128rm VR128X:$src1, addr:$src2)>;
5051 def : Pat<(or VR128X:$src1, (loadv16i8 addr:$src2)),
5052 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5053 def : Pat<(or VR128X:$src1, (loadv8i16 addr:$src2)),
5054 (VPORQZ128rm VR128X:$src1, addr:$src2)>;
5056 def : Pat<(xor VR128X:$src1, (loadv16i8 addr:$src2)),
5057 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5058 def : Pat<(xor VR128X:$src1, (loadv8i16 addr:$src2)),
5059 (VPXORQZ128rm VR128X:$src1, addr:$src2)>;
5061 def : Pat<(X86andnp VR128X:$src1, (loadv16i8 addr:$src2)),
5062 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5063 def : Pat<(X86andnp VR128X:$src1, (loadv8i16 addr:$src2)),
5064 (VPANDNQZ128rm VR128X:$src1, addr:$src2)>;
5066 def : Pat<(v32i8 (and VR256X:$src1, VR256X:$src2)),
5067 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5068 def : Pat<(v16i16 (and VR256X:$src1, VR256X:$src2)),
5069 (VPANDQZ256rr VR256X:$src1, VR256X:$src2)>;
5071 def : Pat<(v32i8 (or VR256X:$src1, VR256X:$src2)),
5072 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5073 def : Pat<(v16i16 (or VR256X:$src1, VR256X:$src2)),
5074 (VPORQZ256rr VR256X:$src1, VR256X:$src2)>;
5076 def : Pat<(v32i8 (xor VR256X:$src1, VR256X:$src2)),
5077 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5078 def : Pat<(v16i16 (xor VR256X:$src1, VR256X:$src2)),
5079 (VPXORQZ256rr VR256X:$src1, VR256X:$src2)>;
5081 def : Pat<(v32i8 (X86andnp VR256X:$src1, VR256X:$src2)),
5082 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5083 def : Pat<(v16i16 (X86andnp VR256X:$src1, VR256X:$src2)),
5084 (VPANDNQZ256rr VR256X:$src1, VR256X:$src2)>;
5086 def : Pat<(and VR256X:$src1, (loadv32i8 addr:$src2)),
5087 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5088 def : Pat<(and VR256X:$src1, (loadv16i16 addr:$src2)),
5089 (VPANDQZ256rm VR256X:$src1, addr:$src2)>;
5091 def : Pat<(or VR256X:$src1, (loadv32i8 addr:$src2)),
5092 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5093 def : Pat<(or VR256X:$src1, (loadv16i16 addr:$src2)),
5094 (VPORQZ256rm VR256X:$src1, addr:$src2)>;
5096 def : Pat<(xor VR256X:$src1, (loadv32i8 addr:$src2)),
5097 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5098 def : Pat<(xor VR256X:$src1, (loadv16i16 addr:$src2)),
5099 (VPXORQZ256rm VR256X:$src1, addr:$src2)>;
5101 def : Pat<(X86andnp VR256X:$src1, (loadv32i8 addr:$src2)),
5102 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5103 def : Pat<(X86andnp VR256X:$src1, (loadv16i16 addr:$src2)),
5104 (VPANDNQZ256rm VR256X:$src1, addr:$src2)>;
5107 let Predicates = [HasAVX512] in {
5108 def : Pat<(v64i8 (and VR512:$src1, VR512:$src2)),
5109 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5110 def : Pat<(v32i16 (and VR512:$src1, VR512:$src2)),
5111 (VPANDQZrr VR512:$src1, VR512:$src2)>;
5113 def : Pat<(v64i8 (or VR512:$src1, VR512:$src2)),
5114 (VPORQZrr VR512:$src1, VR512:$src2)>;
5115 def : Pat<(v32i16 (or VR512:$src1, VR512:$src2)),
5116 (VPORQZrr VR512:$src1, VR512:$src2)>;
5118 def : Pat<(v64i8 (xor VR512:$src1, VR512:$src2)),
5119 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5120 def : Pat<(v32i16 (xor VR512:$src1, VR512:$src2)),
5121 (VPXORQZrr VR512:$src1, VR512:$src2)>;
5123 def : Pat<(v64i8 (X86andnp VR512:$src1, VR512:$src2)),
5124 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5125 def : Pat<(v32i16 (X86andnp VR512:$src1, VR512:$src2)),
5126 (VPANDNQZrr VR512:$src1, VR512:$src2)>;
5128 def : Pat<(and VR512:$src1, (loadv64i8 addr:$src2)),
5129 (VPANDQZrm VR512:$src1, addr:$src2)>;
5130 def : Pat<(and VR512:$src1, (loadv32i16 addr:$src2)),
5131 (VPANDQZrm VR512:$src1, addr:$src2)>;
5133 def : Pat<(or VR512:$src1, (loadv64i8 addr:$src2)),
5134 (VPORQZrm VR512:$src1, addr:$src2)>;
5135 def : Pat<(or VR512:$src1, (loadv32i16 addr:$src2)),
5136 (VPORQZrm VR512:$src1, addr:$src2)>;
5138 def : Pat<(xor VR512:$src1, (loadv64i8 addr:$src2)),
5139 (VPXORQZrm VR512:$src1, addr:$src2)>;
5140 def : Pat<(xor VR512:$src1, (loadv32i16 addr:$src2)),
5141 (VPXORQZrm VR512:$src1, addr:$src2)>;
5143 def : Pat<(X86andnp VR512:$src1, (loadv64i8 addr:$src2)),
5144 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5145 def : Pat<(X86andnp VR512:$src1, (loadv32i16 addr:$src2)),
5146 (VPANDNQZrm VR512:$src1, addr:$src2)>;
5149 // Patterns to catch vselect with different type than logic op.
5150 multiclass avx512_logical_lowering<string InstrStr, SDNode OpNode,
5152 X86VectorVTInfo IntInfo> {
5153 // Masked register-register logical operations.
5154 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5155 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5157 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5158 _.RC:$src1, _.RC:$src2)>;
5160 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5161 (bitconvert (IntInfo.VT (OpNode _.RC:$src1, _.RC:$src2))),
5163 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5166 // Masked register-memory logical operations.
5167 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5168 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5169 (load addr:$src2)))),
5171 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5172 _.RC:$src1, addr:$src2)>;
5173 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5174 (bitconvert (IntInfo.VT (OpNode _.RC:$src1,
5175 (load addr:$src2)))),
5177 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5181 multiclass avx512_logical_lowering_bcast<string InstrStr, SDNode OpNode,
5183 X86VectorVTInfo IntInfo> {
5184 // Register-broadcast logical operations.
5185 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5187 (IntInfo.VT (OpNode _.RC:$src1,
5188 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5190 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5191 _.RC:$src1, addr:$src2)>;
5192 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
5194 (IntInfo.VT (OpNode _.RC:$src1,
5195 (IntInfo.VT (IntInfo.BroadcastLdFrag addr:$src2))))),
5197 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5198 _.RC:$src1, addr:$src2)>;
5201 multiclass avx512_logical_lowering_sizes<string InstrStr, SDNode OpNode,
5202 AVX512VLVectorVTInfo SelectInfo,
5203 AVX512VLVectorVTInfo IntInfo> {
5204 let Predicates = [HasVLX] in {
5205 defm : avx512_logical_lowering<InstrStr#"Z128", OpNode, SelectInfo.info128,
5207 defm : avx512_logical_lowering<InstrStr#"Z256", OpNode, SelectInfo.info256,
5210 let Predicates = [HasAVX512] in {
5211 defm : avx512_logical_lowering<InstrStr#"Z", OpNode, SelectInfo.info512,
5216 multiclass avx512_logical_lowering_sizes_bcast<string InstrStr, SDNode OpNode,
5217 AVX512VLVectorVTInfo SelectInfo,
5218 AVX512VLVectorVTInfo IntInfo> {
5219 let Predicates = [HasVLX] in {
5220 defm : avx512_logical_lowering_bcast<InstrStr#"Z128", OpNode,
5221 SelectInfo.info128, IntInfo.info128>;
5222 defm : avx512_logical_lowering_bcast<InstrStr#"Z256", OpNode,
5223 SelectInfo.info256, IntInfo.info256>;
5225 let Predicates = [HasAVX512] in {
5226 defm : avx512_logical_lowering_bcast<InstrStr#"Z", OpNode,
5227 SelectInfo.info512, IntInfo.info512>;
5231 multiclass avx512_logical_lowering_types<string InstrStr, SDNode OpNode> {
5232 // i64 vselect with i32/i16/i8 logic op
5233 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5235 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5237 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_i64_info,
5240 // i32 vselect with i64/i16/i8 logic op
5241 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5243 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5245 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_i32_info,
5248 // f32 vselect with i64/i32/i16/i8 logic op
5249 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5251 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5253 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5255 defm : avx512_logical_lowering_sizes<InstrStr#"D", OpNode, avx512vl_f32_info,
5258 // f64 vselect with i64/i32/i16/i8 logic op
5259 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5261 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5263 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5265 defm : avx512_logical_lowering_sizes<InstrStr#"Q", OpNode, avx512vl_f64_info,
5268 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"D", OpNode,
5271 defm : avx512_logical_lowering_sizes_bcast<InstrStr#"Q", OpNode,
5276 defm : avx512_logical_lowering_types<"VPAND", and>;
5277 defm : avx512_logical_lowering_types<"VPOR", or>;
5278 defm : avx512_logical_lowering_types<"VPXOR", xor>;
5279 defm : avx512_logical_lowering_types<"VPANDN", X86andnp>;
5281 //===----------------------------------------------------------------------===//
5282 // AVX-512 FP arithmetic
5283 //===----------------------------------------------------------------------===//
5285 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5286 SDNode OpNode, SDNode VecNode,
5287 X86FoldableSchedWrite sched, bit IsCommutable> {
5288 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5289 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5290 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5291 "$src2, $src1", "$src1, $src2",
5292 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5295 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5296 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5297 "$src2, $src1", "$src1, $src2",
5298 (_.VT (VecNode _.RC:$src1,
5299 (_.ScalarIntMemFrags addr:$src2)))>,
5300 Sched<[sched.Folded, sched.ReadAfterFold]>;
5301 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
5302 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5303 (ins _.FRC:$src1, _.FRC:$src2),
5304 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5305 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5307 let isCommutable = IsCommutable;
5309 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5310 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5311 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5312 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5313 (_.ScalarLdFrag addr:$src2)))]>,
5314 Sched<[sched.Folded, sched.ReadAfterFold]>;
5319 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5320 SDNode VecNode, X86FoldableSchedWrite sched,
5321 bit IsCommutable = 0> {
5322 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5323 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5324 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
5325 "$rc, $src2, $src1", "$src1, $src2, $rc",
5326 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
5328 EVEX_B, EVEX_RC, Sched<[sched]>;
5330 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
5331 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
5332 X86FoldableSchedWrite sched, bit IsCommutable,
5333 string EVEX2VexOvrd> {
5334 let ExeDomain = _.ExeDomain in {
5335 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5336 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5337 "$src2, $src1", "$src1, $src2",
5338 (_.VT (VecNode _.RC:$src1, _.RC:$src2))>,
5339 Sched<[sched]>, SIMD_EXC;
5341 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5342 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
5343 "$src2, $src1", "$src1, $src2",
5344 (_.VT (VecNode _.RC:$src1,
5345 (_.ScalarIntMemFrags addr:$src2)))>,
5346 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
5348 let isCodeGenOnly = 1, Predicates = [HasAVX512],
5349 Uses = [MXCSR], mayRaiseFPException = 1 in {
5350 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5351 (ins _.FRC:$src1, _.FRC:$src2),
5352 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5353 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5355 EVEX2VEXOverride<EVEX2VexOvrd#"rr"> {
5356 let isCommutable = IsCommutable;
5358 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5359 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5360 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5361 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5362 (_.ScalarLdFrag addr:$src2)))]>,
5363 Sched<[sched.Folded, sched.ReadAfterFold]>,
5364 EVEX2VEXOverride<EVEX2VexOvrd#"rm">;
5367 let Uses = [MXCSR] in
5368 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5369 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5370 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5371 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5372 EVEX_B, Sched<[sched]>;
5376 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
5377 SDNode VecNode, SDNode RndNode,
5378 X86SchedWriteSizes sched, bit IsCommutable> {
5379 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
5380 sched.PS.Scl, IsCommutable>,
5381 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
5382 sched.PS.Scl, IsCommutable>,
5383 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5384 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
5385 sched.PD.Scl, IsCommutable>,
5386 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
5387 sched.PD.Scl, IsCommutable>,
5388 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5391 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
5392 SDNode VecNode, SDNode SaeNode,
5393 X86SchedWriteSizes sched, bit IsCommutable> {
5394 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
5395 VecNode, SaeNode, sched.PS.Scl, IsCommutable,
5397 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5398 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
5399 VecNode, SaeNode, sched.PD.Scl, IsCommutable,
5401 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
5403 defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
5404 SchedWriteFAddSizes, 1>;
5405 defm VMUL : avx512_binop_s_round<0x59, "vmul", any_fmul, X86fmuls, X86fmulRnds,
5406 SchedWriteFMulSizes, 1>;
5407 defm VSUB : avx512_binop_s_round<0x5C, "vsub", any_fsub, X86fsubs, X86fsubRnds,
5408 SchedWriteFAddSizes, 0>;
5409 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", any_fdiv, X86fdivs, X86fdivRnds,
5410 SchedWriteFDivSizes, 0>;
5411 defm VMIN : avx512_binop_s_sae<0x5D, "vmin", X86fmin, X86fmins, X86fminSAEs,
5412 SchedWriteFCmpSizes, 0>;
5413 defm VMAX : avx512_binop_s_sae<0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxSAEs,
5414 SchedWriteFCmpSizes, 0>;
5416 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
5417 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
5418 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
5419 X86VectorVTInfo _, SDNode OpNode,
5420 X86FoldableSchedWrite sched,
5421 string EVEX2VEXOvrd> {
5422 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
5423 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
5424 (ins _.FRC:$src1, _.FRC:$src2),
5425 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5426 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>,
5427 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr"> {
5428 let isCommutable = 1;
5430 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
5431 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
5432 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5433 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
5434 (_.ScalarLdFrag addr:$src2)))]>,
5435 Sched<[sched.Folded, sched.ReadAfterFold]>,
5436 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
5439 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
5440 SchedWriteFCmp.Scl, "VMINCSS">, XS,
5441 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5443 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
5444 SchedWriteFCmp.Scl, "VMINCSD">, XD,
5445 VEX_W, EVEX_4V, VEX_LIG,
5446 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5448 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
5449 SchedWriteFCmp.Scl, "VMAXCSS">, XS,
5450 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, SIMD_EXC;
5452 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
5453 SchedWriteFCmp.Scl, "VMAXCSD">, XD,
5454 VEX_W, EVEX_4V, VEX_LIG,
5455 EVEX_CD8<64, CD8VT1>, SIMD_EXC;
5457 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5458 SDPatternOperator MaskOpNode,
5459 X86VectorVTInfo _, X86FoldableSchedWrite sched,
5461 bit IsKCommutable = IsCommutable> {
5462 let ExeDomain = _.ExeDomain, hasSideEffects = 0,
5463 Uses = [MXCSR], mayRaiseFPException = 1 in {
5464 defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
5465 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5466 "$src2, $src1", "$src1, $src2",
5467 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
5468 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
5469 IsKCommutable, IsKCommutable>,
5470 EVEX_4V, Sched<[sched]>;
5471 let mayLoad = 1 in {
5472 defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5473 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5474 "$src2, $src1", "$src1, $src2",
5475 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
5476 (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5477 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5478 defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
5479 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5480 "${src2}"#_.BroadcastStr#", $src1",
5481 "$src1, ${src2}"#_.BroadcastStr,
5482 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
5483 (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5485 Sched<[sched.Folded, sched.ReadAfterFold]>;
5490 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
5491 SDPatternOperator OpNodeRnd,
5492 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5493 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5494 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5495 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
5496 "$rc, $src2, $src1", "$src1, $src2, $rc",
5497 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
5498 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
5501 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr,
5502 SDPatternOperator OpNodeSAE,
5503 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5504 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
5505 defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5506 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5507 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
5508 (_.VT (OpNodeSAE _.RC:$src1, _.RC:$src2))>,
5509 EVEX_4V, EVEX_B, Sched<[sched]>;
5512 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
5513 SDPatternOperator MaskOpNode,
5514 Predicate prd, X86SchedWriteSizes sched,
5515 bit IsCommutable = 0,
5516 bit IsPD128Commutable = IsCommutable> {
5517 let Predicates = [prd] in {
5518 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
5519 sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
5520 EVEX_CD8<32, CD8VF>;
5521 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
5522 sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
5523 EVEX_CD8<64, CD8VF>;
5526 // Define only if AVX512VL feature is present.
5527 let Predicates = [prd, HasVLX] in {
5528 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
5529 sched.PS.XMM, IsCommutable>, EVEX_V128, PS,
5530 EVEX_CD8<32, CD8VF>;
5531 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
5532 sched.PS.YMM, IsCommutable>, EVEX_V256, PS,
5533 EVEX_CD8<32, CD8VF>;
5534 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
5535 sched.PD.XMM, IsPD128Commutable,
5536 IsCommutable>, EVEX_V128, PD, VEX_W,
5537 EVEX_CD8<64, CD8VF>;
5538 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
5539 sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
5540 EVEX_CD8<64, CD8VF>;
5544 let Uses = [MXCSR] in
5545 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5546 X86SchedWriteSizes sched> {
5547 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5549 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5550 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5552 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5555 let Uses = [MXCSR] in
5556 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
5557 X86SchedWriteSizes sched> {
5558 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
5560 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5561 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
5563 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5566 defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
5567 SchedWriteFAddSizes, 1>,
5568 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
5569 defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
5570 SchedWriteFMulSizes, 1>,
5571 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
5572 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
5573 SchedWriteFAddSizes>,
5574 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
5575 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
5576 SchedWriteFDivSizes>,
5577 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
5578 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
5579 SchedWriteFCmpSizes, 0>,
5580 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
5581 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
5582 SchedWriteFCmpSizes, 0>,
5583 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
5584 let isCodeGenOnly = 1 in {
5585 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
5586 SchedWriteFCmpSizes, 1>;
5587 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
5588 SchedWriteFCmpSizes, 1>;
5590 let Uses = []<Register>, mayRaiseFPException = 0 in {
5591 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, null_frag, HasDQI,
5592 SchedWriteFLogicSizes, 1>;
5593 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, null_frag, HasDQI,
5594 SchedWriteFLogicSizes, 0>;
5595 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, null_frag, HasDQI,
5596 SchedWriteFLogicSizes, 1>;
5597 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, null_frag, HasDQI,
5598 SchedWriteFLogicSizes, 1>;
5601 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5602 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5603 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5604 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5605 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5606 "$src2, $src1", "$src1, $src2",
5607 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5608 EVEX_4V, Sched<[sched]>;
5609 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5610 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
5611 "$src2, $src1", "$src1, $src2",
5612 (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
5613 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5614 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5615 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
5616 "${src2}"#_.BroadcastStr#", $src1",
5617 "$src1, ${src2}"#_.BroadcastStr,
5618 (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
5619 EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
5623 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5624 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5625 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
5626 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5627 (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
5628 "$src2, $src1", "$src1, $src2",
5629 (_.VT (OpNode _.RC:$src1, _.RC:$src2))>,
5631 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5632 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr#_.Suffix,
5633 "$src2, $src1", "$src1, $src2",
5634 (OpNode _.RC:$src1, (_.ScalarIntMemFrags addr:$src2))>,
5635 Sched<[sched.Folded, sched.ReadAfterFold]>;
5639 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
5640 X86SchedWriteWidths sched> {
5641 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
5642 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
5643 EVEX_V512, EVEX_CD8<32, CD8VF>;
5644 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
5645 avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
5646 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5647 defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
5648 avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
5649 X86scalefsRnd, sched.Scl>,
5650 EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
5651 defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
5652 avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
5653 X86scalefsRnd, sched.Scl>,
5654 EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
5656 // Define only if AVX512VL feature is present.
5657 let Predicates = [HasVLX] in {
5658 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
5659 EVEX_V128, EVEX_CD8<32, CD8VF>;
5660 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
5661 EVEX_V256, EVEX_CD8<32, CD8VF>;
5662 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
5663 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5664 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
5665 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5668 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
5669 SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
5671 //===----------------------------------------------------------------------===//
5672 // AVX-512 VPTESTM instructions
5673 //===----------------------------------------------------------------------===//
5675 multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
5676 X86FoldableSchedWrite sched, X86VectorVTInfo _,
5678 // NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
5679 // There are just too many permutations due to commutability and bitcasts.
5680 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
5681 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5682 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5683 "$src2, $src1", "$src1, $src2",
5684 (null_frag), (null_frag), 1>,
5685 EVEX_4V, Sched<[sched]>;
5687 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5688 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5689 "$src2, $src1", "$src1, $src2",
5690 (null_frag), (null_frag)>,
5691 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5692 Sched<[sched.Folded, sched.ReadAfterFold]>;
5696 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr,
5697 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5698 let ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
5699 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5700 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5701 "${src2}"#_.BroadcastStr#", $src1",
5702 "$src1, ${src2}"#_.BroadcastStr,
5703 (null_frag), (null_frag)>,
5704 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5705 Sched<[sched.Folded, sched.ReadAfterFold]>;
5708 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
5709 X86SchedWriteWidths sched,
5710 AVX512VLVectorVTInfo _> {
5711 let Predicates = [HasAVX512] in
5712 defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
5713 avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
5715 let Predicates = [HasAVX512, HasVLX] in {
5716 defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
5717 avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
5718 defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
5719 avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
5723 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
5724 X86SchedWriteWidths sched> {
5725 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
5727 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
5728 avx512vl_i64_info>, VEX_W;
5731 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5732 X86SchedWriteWidths sched> {
5733 let Predicates = [HasBWI] in {
5734 defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
5735 v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
5736 defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
5737 v64i8_info, NAME#"B">, EVEX_V512;
5739 let Predicates = [HasVLX, HasBWI] in {
5741 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
5742 v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
5743 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
5744 v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
5745 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
5746 v32i8x_info, NAME#"B">, EVEX_V256;
5747 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
5748 v16i8x_info, NAME#"B">, EVEX_V128;
5752 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5753 X86SchedWriteWidths sched> :
5754 avx512_vptest_wb<opc_wb, OpcodeStr, sched>,
5755 avx512_vptest_dq<opc_dq, OpcodeStr, sched>;
5757 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm",
5758 SchedWriteVecLogic>, T8PD;
5759 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm",
5760 SchedWriteVecLogic>, T8XS;
5762 //===----------------------------------------------------------------------===//
5763 // AVX-512 Shift instructions
5764 //===----------------------------------------------------------------------===//
5766 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5767 string OpcodeStr, SDNode OpNode,
5768 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5769 let ExeDomain = _.ExeDomain in {
5770 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5771 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5772 "$src2, $src1", "$src1, $src2",
5773 (_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
5775 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5776 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5777 "$src2, $src1", "$src1, $src2",
5778 (_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
5780 Sched<[sched.Folded]>;
5784 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5785 string OpcodeStr, SDNode OpNode,
5786 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5787 let ExeDomain = _.ExeDomain in
5788 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5789 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5790 "$src2, ${src1}"#_.BroadcastStr, "${src1}"#_.BroadcastStr#", $src2",
5791 (_.VT (OpNode (_.BroadcastLdFrag addr:$src1), (i8 timm:$src2)))>,
5792 EVEX_B, Sched<[sched.Folded]>;
5795 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5796 X86FoldableSchedWrite sched, ValueType SrcVT,
5797 X86VectorVTInfo _> {
5798 // src2 is always 128-bit
5799 let ExeDomain = _.ExeDomain in {
5800 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5801 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5802 "$src2, $src1", "$src1, $src2",
5803 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>,
5804 AVX512BIBase, EVEX_4V, Sched<[sched]>;
5805 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5806 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5807 "$src2, $src1", "$src1, $src2",
5808 (_.VT (OpNode _.RC:$src1, (SrcVT (load addr:$src2))))>,
5810 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
5814 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5815 X86SchedWriteWidths sched, ValueType SrcVT,
5816 AVX512VLVectorVTInfo VTInfo,
5818 let Predicates = [prd] in
5819 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT,
5820 VTInfo.info512>, EVEX_V512,
5821 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5822 let Predicates = [prd, HasVLX] in {
5823 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT,
5824 VTInfo.info256>, EVEX_V256,
5825 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5826 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT,
5827 VTInfo.info128>, EVEX_V128,
5828 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5832 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5833 string OpcodeStr, SDNode OpNode,
5834 X86SchedWriteWidths sched,
5835 bit NotEVEX2VEXConvertibleQ = 0> {
5836 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32,
5837 avx512vl_i32_info, HasAVX512>;
5838 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5839 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
5840 avx512vl_i64_info, HasAVX512>, VEX_W;
5841 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
5842 avx512vl_i16_info, HasBWI>;
5845 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5846 string OpcodeStr, SDNode OpNode,
5847 X86SchedWriteWidths sched,
5848 AVX512VLVectorVTInfo VTInfo> {
5849 let Predicates = [HasAVX512] in
5850 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5851 sched.ZMM, VTInfo.info512>,
5852 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.ZMM,
5853 VTInfo.info512>, EVEX_V512;
5854 let Predicates = [HasAVX512, HasVLX] in {
5855 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5856 sched.YMM, VTInfo.info256>,
5857 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.YMM,
5858 VTInfo.info256>, EVEX_V256;
5859 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5860 sched.XMM, VTInfo.info128>,
5861 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched.XMM,
5862 VTInfo.info128>, EVEX_V128;
5866 multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
5867 string OpcodeStr, SDNode OpNode,
5868 X86SchedWriteWidths sched> {
5869 let Predicates = [HasBWI] in
5870 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5871 sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
5872 let Predicates = [HasVLX, HasBWI] in {
5873 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5874 sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
5875 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5876 sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
5880 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5881 Format ImmFormR, Format ImmFormM,
5882 string OpcodeStr, SDNode OpNode,
5883 X86SchedWriteWidths sched,
5884 bit NotEVEX2VEXConvertibleQ = 0> {
5885 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5886 sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5887 let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
5888 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5889 sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5892 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
5893 SchedWriteVecShiftImm>,
5894 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli,
5895 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5897 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli,
5898 SchedWriteVecShiftImm>,
5899 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli,
5900 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5902 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai,
5903 SchedWriteVecShiftImm, 1>,
5904 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai,
5905 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5907 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri,
5908 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5909 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli,
5910 SchedWriteVecShiftImm>, AVX512BIi8Base, EVEX_4V;
5912 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl,
5913 SchedWriteVecShift>;
5914 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra,
5915 SchedWriteVecShift, 1>;
5916 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl,
5917 SchedWriteVecShift>;
5919 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5920 let Predicates = [HasAVX512, NoVLX] in {
5921 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5922 (EXTRACT_SUBREG (v8i64
5924 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5925 VR128X:$src2)), sub_ymm)>;
5927 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5928 (EXTRACT_SUBREG (v8i64
5930 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5931 VR128X:$src2)), sub_xmm)>;
5933 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
5934 (EXTRACT_SUBREG (v8i64
5936 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5937 timm:$src2)), sub_ymm)>;
5939 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
5940 (EXTRACT_SUBREG (v8i64
5942 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5943 timm:$src2)), sub_xmm)>;
5946 //===-------------------------------------------------------------------===//
5947 // Variable Bit Shifts
5948 //===-------------------------------------------------------------------===//
5950 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5951 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5952 let ExeDomain = _.ExeDomain in {
5953 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5954 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5955 "$src2, $src1", "$src1, $src2",
5956 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>,
5957 AVX5128IBase, EVEX_4V, Sched<[sched]>;
5958 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5959 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5960 "$src2, $src1", "$src1, $src2",
5961 (_.VT (OpNode _.RC:$src1,
5962 (_.VT (_.LdFrag addr:$src2))))>,
5963 AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5964 Sched<[sched.Folded, sched.ReadAfterFold]>;
5968 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5969 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
5970 let ExeDomain = _.ExeDomain in
5971 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5972 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5973 "${src2}"#_.BroadcastStr#", $src1",
5974 "$src1, ${src2}"#_.BroadcastStr,
5975 (_.VT (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))>,
5976 AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
5977 Sched<[sched.Folded, sched.ReadAfterFold]>;
5980 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5981 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
5982 let Predicates = [HasAVX512] in
5983 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
5984 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512;
5986 let Predicates = [HasAVX512, HasVLX] in {
5987 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
5988 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256;
5989 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
5990 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128;
5994 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5995 SDNode OpNode, X86SchedWriteWidths sched> {
5996 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
5998 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
5999 avx512vl_i64_info>, VEX_W;
6002 // Use 512bit version to implement 128/256 bit in case NoVLX.
6003 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
6004 SDNode OpNode, list<Predicate> p> {
6005 let Predicates = p in {
6006 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
6007 (_.info256.VT _.info256.RC:$src2))),
6009 (!cast<Instruction>(OpcodeStr#"Zrr")
6010 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
6011 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
6014 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
6015 (_.info128.VT _.info128.RC:$src2))),
6017 (!cast<Instruction>(OpcodeStr#"Zrr")
6018 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
6019 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
6023 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
6024 SDNode OpNode, X86SchedWriteWidths sched> {
6025 let Predicates = [HasBWI] in
6026 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
6028 let Predicates = [HasVLX, HasBWI] in {
6030 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
6032 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
6037 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", X86vshlv, SchedWriteVarVecShift>,
6038 avx512_var_shift_w<0x12, "vpsllvw", X86vshlv, SchedWriteVarVecShift>;
6040 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", X86vsrav, SchedWriteVarVecShift>,
6041 avx512_var_shift_w<0x11, "vpsravw", X86vsrav, SchedWriteVarVecShift>;
6043 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", X86vsrlv, SchedWriteVarVecShift>,
6044 avx512_var_shift_w<0x10, "vpsrlvw", X86vsrlv, SchedWriteVarVecShift>;
6046 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, SchedWriteVarVecShift>;
6047 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, SchedWriteVarVecShift>;
6049 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", X86vsrav, [HasAVX512, NoVLX]>;
6050 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", X86vshlv, [HasBWI, NoVLX]>;
6051 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", X86vsrav, [HasBWI, NoVLX]>;
6052 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", X86vsrlv, [HasBWI, NoVLX]>;
6055 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6056 let Predicates = [HasAVX512, NoVLX] in {
6057 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6058 (EXTRACT_SUBREG (v8i64
6060 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6061 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6063 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6064 (EXTRACT_SUBREG (v8i64
6066 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6067 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6070 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6071 (EXTRACT_SUBREG (v16i32
6073 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6074 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6076 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6077 (EXTRACT_SUBREG (v16i32
6079 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6080 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6083 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
6084 (EXTRACT_SUBREG (v8i64
6086 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6087 timm:$src2)), sub_xmm)>;
6088 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
6089 (EXTRACT_SUBREG (v8i64
6091 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6092 timm:$src2)), sub_ymm)>;
6094 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
6095 (EXTRACT_SUBREG (v16i32
6097 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6098 timm:$src2)), sub_xmm)>;
6099 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
6100 (EXTRACT_SUBREG (v16i32
6102 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6103 timm:$src2)), sub_ymm)>;
6106 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
6107 let Predicates = [HasAVX512, NoVLX] in {
6108 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
6109 (EXTRACT_SUBREG (v8i64
6111 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6112 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6114 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
6115 (EXTRACT_SUBREG (v8i64
6117 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6118 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6121 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
6122 (EXTRACT_SUBREG (v16i32
6124 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6125 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm)))),
6127 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
6128 (EXTRACT_SUBREG (v16i32
6130 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6131 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
6134 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
6135 (EXTRACT_SUBREG (v8i64
6137 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6138 timm:$src2)), sub_xmm)>;
6139 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
6140 (EXTRACT_SUBREG (v8i64
6142 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6143 timm:$src2)), sub_ymm)>;
6145 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
6146 (EXTRACT_SUBREG (v16i32
6148 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
6149 timm:$src2)), sub_xmm)>;
6150 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
6151 (EXTRACT_SUBREG (v16i32
6153 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
6154 timm:$src2)), sub_ymm)>;
6157 //===-------------------------------------------------------------------===//
6158 // 1-src variable permutation VPERMW/D/Q
6159 //===-------------------------------------------------------------------===//
6161 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6162 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6163 let Predicates = [HasAVX512] in
6164 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6165 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512;
6167 let Predicates = [HasAVX512, HasVLX] in
6168 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6169 avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256;
6172 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
6173 string OpcodeStr, SDNode OpNode,
6174 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> {
6175 let Predicates = [HasAVX512] in
6176 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6177 sched, VTInfo.info512>,
6178 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6179 sched, VTInfo.info512>, EVEX_V512;
6180 let Predicates = [HasAVX512, HasVLX] in
6181 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
6182 sched, VTInfo.info256>,
6183 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
6184 sched, VTInfo.info256>, EVEX_V256;
6187 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
6188 Predicate prd, SDNode OpNode,
6189 X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> {
6190 let Predicates = [prd] in
6191 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>,
6193 let Predicates = [HasVLX, prd] in {
6194 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>,
6196 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>,
6201 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
6202 WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
6203 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
6204 WriteVarShuffle256, avx512vl_i8_info>;
6206 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
6207 WriteVarShuffle256, avx512vl_i32_info>;
6208 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
6209 WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
6210 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
6211 WriteFVarShuffle256, avx512vl_f32_info>;
6212 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
6213 WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
6215 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
6216 X86VPermi, WriteShuffle256, avx512vl_i64_info>,
6217 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6218 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
6219 X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
6220 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
6222 //===----------------------------------------------------------------------===//
6223 // AVX-512 - VPERMIL
6224 //===----------------------------------------------------------------------===//
6226 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
6227 X86FoldableSchedWrite sched, X86VectorVTInfo _,
6228 X86VectorVTInfo Ctrl> {
6229 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
6230 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
6231 "$src2, $src1", "$src1, $src2",
6232 (_.VT (OpNode _.RC:$src1,
6233 (Ctrl.VT Ctrl.RC:$src2)))>,
6234 T8PD, EVEX_4V, Sched<[sched]>;
6235 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6236 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
6237 "$src2, $src1", "$src1, $src2",
6240 (Ctrl.VT (Ctrl.LdFrag addr:$src2))))>,
6241 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
6242 Sched<[sched.Folded, sched.ReadAfterFold]>;
6243 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
6244 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
6245 "${src2}"#_.BroadcastStr#", $src1",
6246 "$src1, ${src2}"#_.BroadcastStr,
6249 (Ctrl.VT (Ctrl.BroadcastLdFrag addr:$src2))))>,
6250 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
6251 Sched<[sched.Folded, sched.ReadAfterFold]>;
6254 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
6255 X86SchedWriteWidths sched,
6256 AVX512VLVectorVTInfo _,
6257 AVX512VLVectorVTInfo Ctrl> {
6258 let Predicates = [HasAVX512] in {
6259 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.ZMM,
6260 _.info512, Ctrl.info512>, EVEX_V512;
6262 let Predicates = [HasAVX512, HasVLX] in {
6263 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.XMM,
6264 _.info128, Ctrl.info128>, EVEX_V128;
6265 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched.YMM,
6266 _.info256, Ctrl.info256>, EVEX_V256;
6270 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
6271 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
6272 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, SchedWriteFVarShuffle,
6274 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
6275 X86VPermilpi, SchedWriteFShuffle, _>,
6276 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
6279 let ExeDomain = SSEPackedSingle in
6280 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
6282 let ExeDomain = SSEPackedDouble in
6283 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
6284 avx512vl_i64_info>, VEX_W1X;
6286 //===----------------------------------------------------------------------===//
6287 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
6288 //===----------------------------------------------------------------------===//
6290 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
6291 X86PShufd, SchedWriteShuffle, avx512vl_i32_info>,
6292 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
6293 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
6294 X86PShufhw, SchedWriteShuffle>,
6295 EVEX, AVX512XSIi8Base;
6296 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
6297 X86PShuflw, SchedWriteShuffle>,
6298 EVEX, AVX512XDIi8Base;
6300 //===----------------------------------------------------------------------===//
6301 // AVX-512 - VPSHUFB
6302 //===----------------------------------------------------------------------===//
6304 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
6305 X86SchedWriteWidths sched> {
6306 let Predicates = [HasBWI] in
6307 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v64i8_info>,
6310 let Predicates = [HasVLX, HasBWI] in {
6311 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v32i8x_info>,
6313 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v16i8x_info>,
6318 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
6319 SchedWriteVarShuffle>, VEX_WIG;
6321 //===----------------------------------------------------------------------===//
6322 // Move Low to High and High to Low packed FP Instructions
6323 //===----------------------------------------------------------------------===//
6325 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
6326 (ins VR128X:$src1, VR128X:$src2),
6327 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6328 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))]>,
6329 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
6330 let isCommutable = 1 in
6331 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
6332 (ins VR128X:$src1, VR128X:$src2),
6333 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
6334 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
6335 Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
6337 //===----------------------------------------------------------------------===//
6338 // VMOVHPS/PD VMOVLPS Instructions
6339 // All patterns was taken from SSS implementation.
6340 //===----------------------------------------------------------------------===//
6342 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
6343 SDPatternOperator OpNode,
6344 X86VectorVTInfo _> {
6345 let hasSideEffects = 0, mayLoad = 1, ExeDomain = _.ExeDomain in
6346 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
6347 (ins _.RC:$src1, f64mem:$src2),
6348 !strconcat(OpcodeStr,
6349 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6353 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))]>,
6354 Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>, EVEX_4V;
6357 // No patterns for MOVLPS/MOVHPS as the Movlhps node should only be created in
6358 // SSE1. And MOVLPS pattern is even more complex.
6359 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
6360 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6361 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
6362 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6363 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
6364 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
6365 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
6366 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
6368 let Predicates = [HasAVX512] in {
6370 def : Pat<(v2f64 (X86Unpckl VR128X:$src1, (X86vzload64 addr:$src2))),
6371 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
6374 def : Pat<(v2f64 (X86Movsd VR128X:$src1, (X86vzload64 addr:$src2))),
6375 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6378 let SchedRW = [WriteFStore] in {
6379 let mayStore = 1, hasSideEffects = 0 in
6380 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6381 (ins f64mem:$dst, VR128X:$src),
6382 "vmovhps\t{$src, $dst|$dst, $src}",
6383 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6384 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6385 (ins f64mem:$dst, VR128X:$src),
6386 "vmovhpd\t{$src, $dst|$dst, $src}",
6387 [(store (f64 (extractelt
6388 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6389 (iPTR 0))), addr:$dst)]>,
6390 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6391 let mayStore = 1, hasSideEffects = 0 in
6392 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6393 (ins f64mem:$dst, VR128X:$src),
6394 "vmovlps\t{$src, $dst|$dst, $src}",
6395 []>, EVEX, EVEX_CD8<32, CD8VT2>;
6396 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6397 (ins f64mem:$dst, VR128X:$src),
6398 "vmovlpd\t{$src, $dst|$dst, $src}",
6399 [(store (f64 (extractelt (v2f64 VR128X:$src),
6400 (iPTR 0))), addr:$dst)]>,
6401 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6404 let Predicates = [HasAVX512] in {
6406 def : Pat<(store (f64 (extractelt
6407 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6408 (iPTR 0))), addr:$dst),
6409 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6411 //===----------------------------------------------------------------------===//
6412 // FMA - Fused Multiply Operations
6415 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6416 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6417 X86VectorVTInfo _, string Suff> {
6418 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6419 Uses = [MXCSR], mayRaiseFPException = 1 in {
6420 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6421 (ins _.RC:$src2, _.RC:$src3),
6422 OpcodeStr, "$src3, $src2", "$src2, $src3",
6423 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
6424 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6425 AVX512FMA3Base, Sched<[sched]>;
6427 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6428 (ins _.RC:$src2, _.MemOp:$src3),
6429 OpcodeStr, "$src3, $src2", "$src2, $src3",
6430 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
6431 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6432 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6434 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6435 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6436 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6437 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6439 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
6440 (MaskOpNode _.RC:$src2,
6441 _.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
6442 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6446 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6447 X86FoldableSchedWrite sched,
6448 X86VectorVTInfo _, string Suff> {
6449 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6451 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6452 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6453 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6454 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
6455 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
6456 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6459 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6460 SDNode MaskOpNode, SDNode OpNodeRnd,
6461 X86SchedWriteWidths sched,
6462 AVX512VLVectorVTInfo _, string Suff> {
6463 let Predicates = [HasAVX512] in {
6464 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6465 sched.ZMM, _.info512, Suff>,
6466 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6468 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6470 let Predicates = [HasVLX, HasAVX512] in {
6471 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6472 sched.YMM, _.info256, Suff>,
6473 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6474 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6475 sched.XMM, _.info128, Suff>,
6476 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6480 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6481 SDNode MaskOpNode, SDNode OpNodeRnd> {
6482 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6483 OpNodeRnd, SchedWriteFMA,
6484 avx512vl_f32_info, "PS">;
6485 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6486 OpNodeRnd, SchedWriteFMA,
6487 avx512vl_f64_info, "PD">, VEX_W;
6490 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd,
6491 X86Fmadd, X86FmaddRnd>;
6492 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86any_Fmsub,
6493 X86Fmsub, X86FmsubRnd>;
6494 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub,
6495 X86Fmaddsub, X86FmaddsubRnd>;
6496 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd,
6497 X86Fmsubadd, X86FmsubaddRnd>;
6498 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86any_Fnmadd,
6499 X86Fnmadd, X86FnmaddRnd>;
6500 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
6501 X86Fnmsub, X86FnmsubRnd>;
6504 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6505 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6506 X86VectorVTInfo _, string Suff> {
6507 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6508 Uses = [MXCSR], mayRaiseFPException = 1 in {
6509 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6510 (ins _.RC:$src2, _.RC:$src3),
6511 OpcodeStr, "$src3, $src2", "$src2, $src3",
6513 (_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6514 AVX512FMA3Base, Sched<[sched]>;
6516 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6517 (ins _.RC:$src2, _.MemOp:$src3),
6518 OpcodeStr, "$src3, $src2", "$src2, $src3",
6519 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
6520 (_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6521 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6523 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6524 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6525 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6526 "$src2, ${src3}"#_.BroadcastStr,
6527 (_.VT (OpNode _.RC:$src2,
6528 (_.VT (_.BroadcastLdFrag addr:$src3)),
6530 (_.VT (MaskOpNode _.RC:$src2,
6531 (_.VT (_.BroadcastLdFrag addr:$src3)),
6532 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
6533 Sched<[sched.Folded, sched.ReadAfterFold]>;
6537 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6538 X86FoldableSchedWrite sched,
6539 X86VectorVTInfo _, string Suff> {
6540 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6542 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6543 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6544 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6546 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
6547 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6550 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6551 SDNode MaskOpNode, SDNode OpNodeRnd,
6552 X86SchedWriteWidths sched,
6553 AVX512VLVectorVTInfo _, string Suff> {
6554 let Predicates = [HasAVX512] in {
6555 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6556 sched.ZMM, _.info512, Suff>,
6557 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6559 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6561 let Predicates = [HasVLX, HasAVX512] in {
6562 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6563 sched.YMM, _.info256, Suff>,
6564 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6565 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6566 sched.XMM, _.info128, Suff>,
6567 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6571 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6572 SDNode MaskOpNode, SDNode OpNodeRnd > {
6573 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6574 OpNodeRnd, SchedWriteFMA,
6575 avx512vl_f32_info, "PS">;
6576 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6577 OpNodeRnd, SchedWriteFMA,
6578 avx512vl_f64_info, "PD">, VEX_W;
6581 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd,
6582 X86Fmadd, X86FmaddRnd>;
6583 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86any_Fmsub,
6584 X86Fmsub, X86FmsubRnd>;
6585 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub,
6586 X86Fmaddsub, X86FmaddsubRnd>;
6587 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd,
6588 X86Fmsubadd, X86FmsubaddRnd>;
6589 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86any_Fnmadd,
6590 X86Fnmadd, X86FnmaddRnd>;
6591 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
6592 X86Fnmsub, X86FnmsubRnd>;
6594 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6595 SDNode MaskOpNode, X86FoldableSchedWrite sched,
6596 X86VectorVTInfo _, string Suff> {
6597 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6598 Uses = [MXCSR], mayRaiseFPException = 1 in {
6599 defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6600 (ins _.RC:$src2, _.RC:$src3),
6601 OpcodeStr, "$src3, $src2", "$src2, $src3",
6603 (_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6604 AVX512FMA3Base, Sched<[sched]>;
6606 // Pattern is 312 order so that the load is in a different place from the
6607 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6608 defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6609 (ins _.RC:$src2, _.MemOp:$src3),
6610 OpcodeStr, "$src3, $src2", "$src2, $src3",
6611 (_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
6612 (_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
6613 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
6615 // Pattern is 312 order so that the load is in a different place from the
6616 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6617 defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
6618 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6619 OpcodeStr, "${src3}"#_.BroadcastStr#", $src2",
6620 "$src2, ${src3}"#_.BroadcastStr,
6621 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6622 _.RC:$src1, _.RC:$src2)),
6623 (_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
6624 _.RC:$src1, _.RC:$src2)), 1, 0>,
6625 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
6629 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6630 X86FoldableSchedWrite sched,
6631 X86VectorVTInfo _, string Suff> {
6632 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
6634 defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
6635 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6636 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6638 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
6639 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
6642 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6643 SDNode MaskOpNode, SDNode OpNodeRnd,
6644 X86SchedWriteWidths sched,
6645 AVX512VLVectorVTInfo _, string Suff> {
6646 let Predicates = [HasAVX512] in {
6647 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6648 sched.ZMM, _.info512, Suff>,
6649 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
6651 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6653 let Predicates = [HasVLX, HasAVX512] in {
6654 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6655 sched.YMM, _.info256, Suff>,
6656 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6657 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
6658 sched.XMM, _.info128, Suff>,
6659 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6663 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6664 SDNode MaskOpNode, SDNode OpNodeRnd > {
6665 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
6666 OpNodeRnd, SchedWriteFMA,
6667 avx512vl_f32_info, "PS">;
6668 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
6669 OpNodeRnd, SchedWriteFMA,
6670 avx512vl_f64_info, "PD">, VEX_W;
6673 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd,
6674 X86Fmadd, X86FmaddRnd>;
6675 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86any_Fmsub,
6676 X86Fmsub, X86FmsubRnd>;
6677 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub,
6678 X86Fmaddsub, X86FmaddsubRnd>;
6679 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd,
6680 X86Fmsubadd, X86FmsubaddRnd>;
6681 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86any_Fnmadd,
6682 X86Fnmadd, X86FnmaddRnd>;
6683 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub,
6684 X86Fnmsub, X86FnmsubRnd>;
6687 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6688 dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> {
6689 let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
6690 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6691 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6692 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6693 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6696 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6697 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6698 "$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
6699 AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6701 let Uses = [MXCSR] in
6702 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6703 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6704 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
6705 AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
6707 let isCodeGenOnly = 1, isCommutable = 1 in {
6708 def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6709 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6710 !strconcat(OpcodeStr,
6711 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6712 !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
6713 def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
6714 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6715 !strconcat(OpcodeStr,
6716 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6717 [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
6719 let Uses = [MXCSR] in
6720 def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
6721 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
6722 !strconcat(OpcodeStr,
6723 "\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
6724 !if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
6725 Sched<[SchedWriteFMA.Scl]>;
6726 }// isCodeGenOnly = 1
6727 }// Constraints = "$src1 = $dst"
6730 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6731 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
6732 X86VectorVTInfo _, string SUFF> {
6733 let ExeDomain = _.ExeDomain in {
6734 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix, _,
6735 // Operands for intrinsic are in 123 order to preserve passthu
6737 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6739 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6740 (_.ScalarLdFrag addr:$src3)))),
6741 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src1,
6742 _.FRC:$src3, (i32 timm:$rc)))), 0>;
6744 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix, _,
6745 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6747 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6748 (_.ScalarLdFrag addr:$src3), _.FRC:$src1))),
6749 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src2, _.FRC:$src3,
6750 _.FRC:$src1, (i32 timm:$rc)))), 1>;
6752 // One pattern is 312 order so that the load is in a different place from the
6753 // 213 and 231 patterns this helps tablegen's duplicate pattern detection.
6754 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix, _,
6755 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6757 (set _.FRC:$dst, (_.EltVT (OpNode (_.ScalarLdFrag addr:$src3),
6758 _.FRC:$src1, _.FRC:$src2))),
6759 (set _.FRC:$dst, (_.EltVT (OpNodeRnd _.FRC:$src1, _.FRC:$src3,
6760 _.FRC:$src2, (i32 timm:$rc)))), 1>;
6764 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6765 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd> {
6766 let Predicates = [HasAVX512] in {
6767 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6768 OpNodeRnd, f32x_info, "SS">,
6769 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6770 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6771 OpNodeRnd, f64x_info, "SD">,
6772 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6776 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
6777 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86any_Fmsub, X86FmsubRnd>;
6778 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86any_Fnmadd, X86FnmaddRnd>;
6779 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86FnmsubRnd>;
6781 multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode MaskedOp,
6782 SDNode RndOp, string Prefix,
6783 string Suffix, SDNode Move,
6784 X86VectorVTInfo _, PatLeaf ZeroFP> {
6785 let Predicates = [HasAVX512] in {
6786 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6788 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6790 (!cast<I>(Prefix#"213"#Suffix#"Zr_Int")
6791 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6792 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6794 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6795 (Op _.FRC:$src2, _.FRC:$src3,
6796 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6797 (!cast<I>(Prefix#"231"#Suffix#"Zr_Int")
6798 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6799 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6801 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6803 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6804 (_.ScalarLdFrag addr:$src3)))))),
6805 (!cast<I>(Prefix#"213"#Suffix#"Zm_Int")
6806 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6809 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6810 (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6811 (_.ScalarLdFrag addr:$src3), _.FRC:$src2))))),
6812 (!cast<I>(Prefix#"132"#Suffix#"Zm_Int")
6813 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6816 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6817 (Op _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6818 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6819 (!cast<I>(Prefix#"231"#Suffix#"Zm_Int")
6820 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6823 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6824 (X86selects_mask VK1WM:$mask,
6825 (MaskedOp _.FRC:$src2,
6826 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6828 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6829 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intk")
6830 VR128X:$src1, VK1WM:$mask,
6831 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6832 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6834 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6835 (X86selects_mask VK1WM:$mask,
6836 (MaskedOp _.FRC:$src2,
6837 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6838 (_.ScalarLdFrag addr:$src3)),
6839 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6840 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intk")
6841 VR128X:$src1, VK1WM:$mask,
6842 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6844 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6845 (X86selects_mask VK1WM:$mask,
6846 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6847 (_.ScalarLdFrag addr:$src3), _.FRC:$src2),
6848 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6849 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intk")
6850 VR128X:$src1, VK1WM:$mask,
6851 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6853 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6854 (X86selects_mask VK1WM:$mask,
6855 (MaskedOp _.FRC:$src2, _.FRC:$src3,
6856 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6857 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6858 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intk")
6859 VR128X:$src1, VK1WM:$mask,
6860 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6861 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6863 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6864 (X86selects_mask VK1WM:$mask,
6865 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6866 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6867 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6868 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intk")
6869 VR128X:$src1, VK1WM:$mask,
6870 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6872 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6873 (X86selects_mask VK1WM:$mask,
6874 (MaskedOp _.FRC:$src2,
6875 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6877 (_.EltVT ZeroFP)))))),
6878 (!cast<I>(Prefix#"213"#Suffix#"Zr_Intkz")
6879 VR128X:$src1, VK1WM:$mask,
6880 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6881 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6883 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6884 (X86selects_mask VK1WM:$mask,
6885 (MaskedOp _.FRC:$src2, _.FRC:$src3,
6886 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6887 (_.EltVT ZeroFP)))))),
6888 (!cast<I>(Prefix#"231"#Suffix#"Zr_Intkz")
6889 VR128X:$src1, VK1WM:$mask,
6890 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6891 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>;
6893 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6894 (X86selects_mask VK1WM:$mask,
6895 (MaskedOp _.FRC:$src2,
6896 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6897 (_.ScalarLdFrag addr:$src3)),
6898 (_.EltVT ZeroFP)))))),
6899 (!cast<I>(Prefix#"213"#Suffix#"Zm_Intkz")
6900 VR128X:$src1, VK1WM:$mask,
6901 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6903 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6904 (X86selects_mask VK1WM:$mask,
6905 (MaskedOp (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6906 _.FRC:$src2, (_.ScalarLdFrag addr:$src3)),
6907 (_.EltVT ZeroFP)))))),
6908 (!cast<I>(Prefix#"132"#Suffix#"Zm_Intkz")
6909 VR128X:$src1, VK1WM:$mask,
6910 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6912 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6913 (X86selects_mask VK1WM:$mask,
6914 (MaskedOp _.FRC:$src2, (_.ScalarLdFrag addr:$src3),
6915 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))),
6916 (_.EltVT ZeroFP)))))),
6917 (!cast<I>(Prefix#"231"#Suffix#"Zm_Intkz")
6918 VR128X:$src1, VK1WM:$mask,
6919 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>;
6921 // Patterns with rounding mode.
6922 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6924 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6925 _.FRC:$src3, (i32 timm:$rc)))))),
6926 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Int")
6927 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6928 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6930 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6931 (RndOp _.FRC:$src2, _.FRC:$src3,
6932 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6933 (i32 timm:$rc)))))),
6934 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Int")
6935 VR128X:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6936 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6938 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6939 (X86selects_mask VK1WM:$mask,
6941 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6942 _.FRC:$src3, (i32 timm:$rc)),
6943 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6944 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intk")
6945 VR128X:$src1, VK1WM:$mask,
6946 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6947 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6949 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6950 (X86selects_mask VK1WM:$mask,
6951 (RndOp _.FRC:$src2, _.FRC:$src3,
6952 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6954 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0)))))))),
6955 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intk")
6956 VR128X:$src1, VK1WM:$mask,
6957 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6958 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6960 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6961 (X86selects_mask VK1WM:$mask,
6963 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6964 _.FRC:$src3, (i32 timm:$rc)),
6965 (_.EltVT ZeroFP)))))),
6966 (!cast<I>(Prefix#"213"#Suffix#"Zrb_Intkz")
6967 VR128X:$src1, VK1WM:$mask,
6968 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6969 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6971 def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
6972 (X86selects_mask VK1WM:$mask,
6973 (RndOp _.FRC:$src2, _.FRC:$src3,
6974 (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
6976 (_.EltVT ZeroFP)))))),
6977 (!cast<I>(Prefix#"231"#Suffix#"Zrb_Intkz")
6978 VR128X:$src1, VK1WM:$mask,
6979 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)),
6980 (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
6984 defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
6985 "SS", X86Movss, v4f32x_info, fp32imm0>;
6986 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6987 "SS", X86Movss, v4f32x_info, fp32imm0>;
6988 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6989 "SS", X86Movss, v4f32x_info, fp32imm0>;
6990 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
6991 "SS", X86Movss, v4f32x_info, fp32imm0>;
6993 defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86Fmadd, X86FmaddRnd, "VFMADD",
6994 "SD", X86Movsd, v2f64x_info, fp64imm0>;
6995 defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB",
6996 "SD", X86Movsd, v2f64x_info, fp64imm0>;
6997 defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD",
6998 "SD", X86Movsd, v2f64x_info, fp64imm0>;
6999 defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB",
7000 "SD", X86Movsd, v2f64x_info, fp64imm0>;
7002 //===----------------------------------------------------------------------===//
7003 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
7004 //===----------------------------------------------------------------------===//
7005 let Constraints = "$src1 = $dst" in {
7006 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
7007 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
7008 // NOTE: The SDNode have the multiply operands first with the add last.
7009 // This enables commuted load patterns to be autogenerated by tablegen.
7010 let ExeDomain = _.ExeDomain in {
7011 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
7012 (ins _.RC:$src2, _.RC:$src3),
7013 OpcodeStr, "$src3, $src2", "$src2, $src3",
7014 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
7015 AVX512FMA3Base, Sched<[sched]>;
7017 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7018 (ins _.RC:$src2, _.MemOp:$src3),
7019 OpcodeStr, "$src3, $src2", "$src2, $src3",
7020 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
7021 AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
7023 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
7024 (ins _.RC:$src2, _.ScalarMemOp:$src3),
7025 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
7026 !strconcat("$src2, ${src3}", _.BroadcastStr ),
7028 (_.VT (_.BroadcastLdFrag addr:$src3)),
7030 AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
7033 } // Constraints = "$src1 = $dst"
7035 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
7036 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
7037 let Predicates = [HasIFMA] in {
7038 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
7039 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
7041 let Predicates = [HasVLX, HasIFMA] in {
7042 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
7043 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
7044 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
7045 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
7049 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
7050 SchedWriteVecIMul, avx512vl_i64_info>,
7052 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
7053 SchedWriteVecIMul, avx512vl_i64_info>,
7056 //===----------------------------------------------------------------------===//
7057 // AVX-512 Scalar convert from sign integer to float/double
7058 //===----------------------------------------------------------------------===//
7060 multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSchedWrite sched,
7061 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7062 X86MemOperand x86memop, PatFrag ld_frag, string asm,
7063 string mem, list<Register> _Uses = [MXCSR],
7064 bit _mayRaiseFPException = 1> {
7065 let ExeDomain = DstVT.ExeDomain, Uses = _Uses,
7066 mayRaiseFPException = _mayRaiseFPException in {
7067 let hasSideEffects = 0, isCodeGenOnly = 1 in {
7068 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
7069 (ins DstVT.FRC:$src1, SrcRC:$src),
7070 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
7071 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7073 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
7074 (ins DstVT.FRC:$src1, x86memop:$src),
7075 asm#"{"#mem#"}\t{$src, $src1, $dst|$dst, $src1, $src}", []>,
7076 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7077 } // hasSideEffects = 0
7078 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7079 (ins DstVT.RC:$src1, SrcRC:$src2),
7080 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
7081 [(set DstVT.RC:$dst,
7082 (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2))]>,
7083 EVEX_4V, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7085 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
7086 (ins DstVT.RC:$src1, x86memop:$src2),
7087 asm#"{"#mem#"}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7088 [(set DstVT.RC:$dst,
7089 (OpNode (DstVT.VT DstVT.RC:$src1),
7090 (ld_frag addr:$src2)))]>,
7091 EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
7093 def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7094 (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst,
7095 DstVT.RC:$src1, SrcRC:$src2), 0, "att">;
7098 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode,
7099 X86FoldableSchedWrite sched, RegisterClass SrcRC,
7100 X86VectorVTInfo DstVT, string asm,
7102 let ExeDomain = DstVT.ExeDomain, Uses = [MXCSR] in
7103 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
7104 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
7106 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
7107 [(set DstVT.RC:$dst,
7108 (OpNode (DstVT.VT DstVT.RC:$src1),
7111 EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched, ReadDefault, ReadInt2Fpu]>;
7112 def : InstAlias<"v"#asm#mem#"\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}",
7113 (!cast<Instruction>(NAME#"rrb_Int") DstVT.RC:$dst,
7114 DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), 0, "att">;
7117 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, SDNode OpNodeRnd,
7118 X86FoldableSchedWrite sched,
7119 RegisterClass SrcRC, X86VectorVTInfo DstVT,
7120 X86MemOperand x86memop, PatFrag ld_frag,
7121 string asm, string mem> {
7122 defm NAME : avx512_vcvtsi_round<opc, OpNodeRnd, sched, SrcRC, DstVT, asm, mem>,
7123 avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop,
7124 ld_frag, asm, mem>, VEX_LIG;
7127 let Predicates = [HasAVX512] in {
7128 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7130 v4f32x_info, i32mem, loadi32, "cvtsi2ss", "l">,
7131 XS, EVEX_CD8<32, CD8VT1>;
7132 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7134 v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
7135 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7136 defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
7137 v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
7138 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7139 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
7141 v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
7142 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7144 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7145 (VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7146 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7147 (VCVTSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7149 def : Pat<(f32 (any_sint_to_fp (loadi32 addr:$src))),
7150 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7151 def : Pat<(f32 (any_sint_to_fp (loadi64 addr:$src))),
7152 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7153 def : Pat<(f64 (any_sint_to_fp (loadi32 addr:$src))),
7154 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7155 def : Pat<(f64 (any_sint_to_fp (loadi64 addr:$src))),
7156 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7158 def : Pat<(f32 (any_sint_to_fp GR32:$src)),
7159 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7160 def : Pat<(f32 (any_sint_to_fp GR64:$src)),
7161 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7162 def : Pat<(f64 (any_sint_to_fp GR32:$src)),
7163 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7164 def : Pat<(f64 (any_sint_to_fp GR64:$src)),
7165 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7167 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7169 v4f32x_info, i32mem, loadi32,
7170 "cvtusi2ss", "l">, XS, EVEX_CD8<32, CD8VT1>;
7171 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7173 v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
7174 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
7175 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
7176 i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
7177 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
7178 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
7180 v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
7181 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7183 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
7184 (VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7185 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
7186 (VCVTUSI2SDZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
7188 def : Pat<(f32 (any_uint_to_fp (loadi32 addr:$src))),
7189 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7190 def : Pat<(f32 (any_uint_to_fp (loadi64 addr:$src))),
7191 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
7192 def : Pat<(f64 (any_uint_to_fp (loadi32 addr:$src))),
7193 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7194 def : Pat<(f64 (any_uint_to_fp (loadi64 addr:$src))),
7195 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
7197 def : Pat<(f32 (any_uint_to_fp GR32:$src)),
7198 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
7199 def : Pat<(f32 (any_uint_to_fp GR64:$src)),
7200 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
7201 def : Pat<(f64 (any_uint_to_fp GR32:$src)),
7202 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
7203 def : Pat<(f64 (any_uint_to_fp GR64:$src)),
7204 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
7207 //===----------------------------------------------------------------------===//
7208 // AVX-512 Scalar convert from float/double to integer
7209 //===----------------------------------------------------------------------===//
7211 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
7212 X86VectorVTInfo DstVT, SDNode OpNode,
7214 X86FoldableSchedWrite sched, string asm,
7216 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7217 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
7218 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7219 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
7220 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7221 let Uses = [MXCSR] in
7222 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
7223 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
7224 [(set DstVT.RC:$dst, (OpNodeRnd (SrcVT.VT SrcVT.RC:$src),(i32 timm:$rc)))]>,
7225 EVEX, VEX_LIG, EVEX_B, EVEX_RC,
7227 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
7228 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7229 [(set DstVT.RC:$dst, (OpNode
7230 (SrcVT.ScalarIntMemFrags addr:$src)))]>,
7231 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7232 } // Predicates = [HasAVX512]
7234 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7235 (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
7236 def : InstAlias<"v" # asm # aliasStr # "\t{$rc, $src, $dst|$dst, $src, $rc}",
7237 (!cast<Instruction>(NAME # "rrb_Int") DstVT.RC:$dst, SrcVT.RC:$src, AVX512RC:$rc), 0, "att">;
7238 def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7239 (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst,
7240 SrcVT.IntScalarMemOp:$src), 0, "att">;
7243 // Convert float/double to signed/unsigned int 32/64
7244 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
7245 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{l}">,
7246 XS, EVEX_CD8<32, CD8VT1>;
7247 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
7248 X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
7249 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7250 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
7251 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
7252 XS, EVEX_CD8<32, CD8VT1>;
7253 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
7254 X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
7255 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
7256 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
7257 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
7258 XD, EVEX_CD8<64, CD8VT1>;
7259 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
7260 X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
7261 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7262 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
7263 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
7264 XD, EVEX_CD8<64, CD8VT1>;
7265 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
7266 X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
7267 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7269 multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
7270 X86VectorVTInfo DstVT, SDNode OpNode,
7271 X86FoldableSchedWrite sched,
7273 let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
7274 let isCodeGenOnly = 1 in {
7275 def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
7276 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7277 [(set DstVT.RC:$dst, (OpNode SrcVT.FRC:$src))]>,
7278 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7279 def rm : AVX512<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src),
7280 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7281 [(set DstVT.RC:$dst, (OpNode (SrcVT.ScalarLdFrag addr:$src)))]>,
7282 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7284 } // Predicates = [HasAVX512]
7287 defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
7288 lrint, WriteCvtSS2I,
7289 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7290 defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
7291 llrint, WriteCvtSS2I,
7292 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7293 defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
7294 lrint, WriteCvtSD2I,
7295 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7296 defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
7297 llrint, WriteCvtSD2I,
7298 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7300 let Predicates = [HasAVX512] in {
7301 def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
7302 def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>;
7304 def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>;
7305 def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>;
7308 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
7309 // which produce unnecessary vmovs{s,d} instructions
7310 let Predicates = [HasAVX512] in {
7311 def : Pat<(v4f32 (X86Movss
7312 (v4f32 VR128X:$dst),
7313 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))),
7314 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7316 def : Pat<(v4f32 (X86Movss
7317 (v4f32 VR128X:$dst),
7318 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi64 addr:$src))))))),
7319 (VCVTSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7321 def : Pat<(v4f32 (X86Movss
7322 (v4f32 VR128X:$dst),
7323 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR32:$src)))))),
7324 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7326 def : Pat<(v4f32 (X86Movss
7327 (v4f32 VR128X:$dst),
7328 (v4f32 (scalar_to_vector (f32 (any_sint_to_fp (loadi32 addr:$src))))))),
7329 (VCVTSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7331 def : Pat<(v2f64 (X86Movsd
7332 (v2f64 VR128X:$dst),
7333 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))),
7334 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7336 def : Pat<(v2f64 (X86Movsd
7337 (v2f64 VR128X:$dst),
7338 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi64 addr:$src))))))),
7339 (VCVTSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7341 def : Pat<(v2f64 (X86Movsd
7342 (v2f64 VR128X:$dst),
7343 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR32:$src)))))),
7344 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7346 def : Pat<(v2f64 (X86Movsd
7347 (v2f64 VR128X:$dst),
7348 (v2f64 (scalar_to_vector (f64 (any_sint_to_fp (loadi32 addr:$src))))))),
7349 (VCVTSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7351 def : Pat<(v4f32 (X86Movss
7352 (v4f32 VR128X:$dst),
7353 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR64:$src)))))),
7354 (VCVTUSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
7356 def : Pat<(v4f32 (X86Movss
7357 (v4f32 VR128X:$dst),
7358 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi64 addr:$src))))))),
7359 (VCVTUSI642SSZrm_Int VR128X:$dst, addr:$src)>;
7361 def : Pat<(v4f32 (X86Movss
7362 (v4f32 VR128X:$dst),
7363 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp GR32:$src)))))),
7364 (VCVTUSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
7366 def : Pat<(v4f32 (X86Movss
7367 (v4f32 VR128X:$dst),
7368 (v4f32 (scalar_to_vector (f32 (any_uint_to_fp (loadi32 addr:$src))))))),
7369 (VCVTUSI2SSZrm_Int VR128X:$dst, addr:$src)>;
7371 def : Pat<(v2f64 (X86Movsd
7372 (v2f64 VR128X:$dst),
7373 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR64:$src)))))),
7374 (VCVTUSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
7376 def : Pat<(v2f64 (X86Movsd
7377 (v2f64 VR128X:$dst),
7378 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi64 addr:$src))))))),
7379 (VCVTUSI642SDZrm_Int VR128X:$dst, addr:$src)>;
7381 def : Pat<(v2f64 (X86Movsd
7382 (v2f64 VR128X:$dst),
7383 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp GR32:$src)))))),
7384 (VCVTUSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
7386 def : Pat<(v2f64 (X86Movsd
7387 (v2f64 VR128X:$dst),
7388 (v2f64 (scalar_to_vector (f64 (any_uint_to_fp (loadi32 addr:$src))))))),
7389 (VCVTUSI2SDZrm_Int VR128X:$dst, addr:$src)>;
7390 } // Predicates = [HasAVX512]
7392 // Convert float/double to signed/unsigned int 32/64 with truncation
7393 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
7394 X86VectorVTInfo _DstRC, SDNode OpNode,
7395 SDNode OpNodeInt, SDNode OpNodeSAE,
7396 X86FoldableSchedWrite sched, string aliasStr>{
7397 let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
7398 let isCodeGenOnly = 1 in {
7399 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
7400 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7401 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>,
7402 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7403 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
7404 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7405 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
7406 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7409 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7410 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7411 [(set _DstRC.RC:$dst, (OpNodeInt (_SrcRC.VT _SrcRC.RC:$src)))]>,
7412 EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC;
7413 let Uses = [MXCSR] in
7414 def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
7415 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
7416 [(set _DstRC.RC:$dst, (OpNodeSAE (_SrcRC.VT _SrcRC.RC:$src)))]>,
7417 EVEX, VEX_LIG, EVEX_B, Sched<[sched]>;
7418 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
7419 (ins _SrcRC.IntScalarMemOp:$src),
7420 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
7421 [(set _DstRC.RC:$dst,
7422 (OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
7423 EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
7426 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7427 (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7428 def : InstAlias<asm # aliasStr # "\t{{sae}, $src, $dst|$dst, $src, {sae}}",
7429 (!cast<Instruction>(NAME # "rrb_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
7430 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
7431 (!cast<Instruction>(NAME # "rm_Int") _DstRC.RC:$dst,
7432 _SrcRC.IntScalarMemOp:$src), 0, "att">;
7435 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
7436 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7437 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7438 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
7439 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
7440 "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
7441 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
7442 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7443 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7444 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
7445 any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
7446 "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
7448 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
7449 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7450 "{l}">, XS, EVEX_CD8<32, CD8VT1>;
7451 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
7452 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
7453 "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
7454 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
7455 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7456 "{l}">, XD, EVEX_CD8<64, CD8VT1>;
7457 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
7458 any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
7459 "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
7461 //===----------------------------------------------------------------------===//
7462 // AVX-512 Convert form float to double and back
7463 //===----------------------------------------------------------------------===//
7465 let Uses = [MXCSR], mayRaiseFPException = 1 in
7466 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7467 X86VectorVTInfo _Src, SDNode OpNode,
7468 X86FoldableSchedWrite sched> {
7469 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7470 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7471 "$src2, $src1", "$src1, $src2",
7472 (_.VT (OpNode (_.VT _.RC:$src1),
7473 (_Src.VT _Src.RC:$src2)))>,
7474 EVEX_4V, VEX_LIG, Sched<[sched]>;
7475 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7476 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
7477 "$src2, $src1", "$src1, $src2",
7478 (_.VT (OpNode (_.VT _.RC:$src1),
7479 (_Src.ScalarIntMemFrags addr:$src2)))>,
7481 Sched<[sched.Folded, sched.ReadAfterFold]>;
7483 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7484 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7485 (ins _.FRC:$src1, _Src.FRC:$src2),
7486 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7487 EVEX_4V, VEX_LIG, Sched<[sched]>;
7489 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7490 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
7491 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
7492 EVEX_4V, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>;
7496 // Scalar Conversion with SAE - suppress all exceptions
7497 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7498 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7499 X86FoldableSchedWrite sched> {
7500 let Uses = [MXCSR] in
7501 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7502 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
7503 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7504 (_.VT (OpNodeSAE (_.VT _.RC:$src1),
7505 (_Src.VT _Src.RC:$src2)))>,
7506 EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
7509 // Scalar Conversion with rounding control (RC)
7510 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7511 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7512 X86FoldableSchedWrite sched> {
7513 let Uses = [MXCSR] in
7514 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7515 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
7516 "$rc, $src2, $src1", "$src1, $src2, $rc",
7517 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
7518 (_Src.VT _Src.RC:$src2), (i32 timm:$rc)))>,
7519 EVEX_4V, VEX_LIG, Sched<[sched]>,
7522 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
7523 SDNode OpNode, SDNode OpNodeRnd,
7524 X86FoldableSchedWrite sched,
7525 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7526 let Predicates = [HasAVX512] in {
7527 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7528 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
7529 OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
7533 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
7534 SDNode OpNode, SDNode OpNodeSAE,
7535 X86FoldableSchedWrite sched,
7536 X86VectorVTInfo _src, X86VectorVTInfo _dst> {
7537 let Predicates = [HasAVX512] in {
7538 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
7539 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
7540 EVEX_CD8<32, CD8VT1>, XS;
7543 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
7544 X86froundsRnd, WriteCvtSD2SS, f64x_info,
7546 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
7547 X86fpextsSAE, WriteCvtSS2SD, f32x_info,
7550 def : Pat<(f64 (any_fpextend FR32X:$src)),
7551 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
7552 Requires<[HasAVX512]>;
7553 def : Pat<(f64 (any_fpextend (loadf32 addr:$src))),
7554 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
7555 Requires<[HasAVX512, OptForSize]>;
7557 def : Pat<(f32 (any_fpround FR64X:$src)),
7558 (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
7559 Requires<[HasAVX512]>;
7561 def : Pat<(v4f32 (X86Movss
7562 (v4f32 VR128X:$dst),
7563 (v4f32 (scalar_to_vector
7564 (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
7565 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
7566 Requires<[HasAVX512]>;
7568 def : Pat<(v2f64 (X86Movsd
7569 (v2f64 VR128X:$dst),
7570 (v2f64 (scalar_to_vector
7571 (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
7572 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
7573 Requires<[HasAVX512]>;
7575 //===----------------------------------------------------------------------===//
7576 // AVX-512 Vector convert from signed/unsigned integer to float/double
7577 // and from float/double to signed/unsigned integer
7578 //===----------------------------------------------------------------------===//
7580 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7581 X86VectorVTInfo _Src, SDNode OpNode, SDNode MaskOpNode,
7582 X86FoldableSchedWrite sched,
7583 string Broadcast = _.BroadcastStr,
7584 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7585 RegisterClass MaskRC = _.KRCWM,
7586 dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))),
7587 dag MaskLdDAG = (_.VT (MaskOpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
7588 let Uses = [MXCSR], mayRaiseFPException = 1 in {
7589 defm rr : AVX512_maskable_cvt<opc, MRMSrcReg, _, (outs _.RC:$dst),
7591 (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
7592 (ins MaskRC:$mask, _Src.RC:$src),
7593 OpcodeStr, "$src", "$src",
7594 (_.VT (OpNode (_Src.VT _Src.RC:$src))),
7595 (vselect_mask MaskRC:$mask,
7596 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7598 (vselect_mask MaskRC:$mask,
7599 (_.VT (MaskOpNode (_Src.VT _Src.RC:$src))),
7601 EVEX, Sched<[sched]>;
7603 defm rm : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7605 (ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
7606 (ins MaskRC:$mask, MemOp:$src),
7607 OpcodeStr#Alias, "$src", "$src",
7609 (vselect_mask MaskRC:$mask, MaskLdDAG, _.RC:$src0),
7610 (vselect_mask MaskRC:$mask, MaskLdDAG, _.ImmAllZerosV)>,
7611 EVEX, Sched<[sched.Folded]>;
7613 defm rmb : AVX512_maskable_cvt<opc, MRMSrcMem, _, (outs _.RC:$dst),
7614 (ins _Src.ScalarMemOp:$src),
7615 (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
7616 (ins MaskRC:$mask, _Src.ScalarMemOp:$src),
7618 "${src}"#Broadcast, "${src}"#Broadcast,
7619 (_.VT (OpNode (_Src.VT
7620 (_Src.BroadcastLdFrag addr:$src))
7622 (vselect_mask MaskRC:$mask,
7626 (_Src.BroadcastLdFrag addr:$src)))),
7628 (vselect_mask MaskRC:$mask,
7632 (_Src.BroadcastLdFrag addr:$src)))),
7634 EVEX, EVEX_B, Sched<[sched.Folded]>;
7637 // Conversion with SAE - suppress all exceptions
7638 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7639 X86VectorVTInfo _Src, SDNode OpNodeSAE,
7640 X86FoldableSchedWrite sched> {
7641 let Uses = [MXCSR] in
7642 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7643 (ins _Src.RC:$src), OpcodeStr,
7644 "{sae}, $src", "$src, {sae}",
7645 (_.VT (OpNodeSAE (_Src.VT _Src.RC:$src)))>,
7646 EVEX, EVEX_B, Sched<[sched]>;
7649 // Conversion with rounding control (RC)
7650 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7651 X86VectorVTInfo _Src, SDNode OpNodeRnd,
7652 X86FoldableSchedWrite sched> {
7653 let Uses = [MXCSR] in
7654 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7655 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
7656 "$rc, $src", "$src, $rc",
7657 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 timm:$rc)))>,
7658 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
7661 // Similar to avx512_vcvt_fp, but uses an extload for the memory form.
7662 multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7663 X86VectorVTInfo _Src, SDNode OpNode,
7665 X86FoldableSchedWrite sched,
7666 string Broadcast = _.BroadcastStr,
7667 string Alias = "", X86MemOperand MemOp = _Src.MemOp,
7668 RegisterClass MaskRC = _.KRCWM>
7669 : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, MaskOpNode, sched, Broadcast,
7670 Alias, MemOp, MaskRC,
7671 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
7672 (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
7674 // Extend Float to Double
7675 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
7676 X86SchedWriteWidths sched> {
7677 let Predicates = [HasAVX512] in {
7678 defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
7679 any_fpextend, fpextend, sched.ZMM>,
7680 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
7681 X86vfpextSAE, sched.ZMM>, EVEX_V512;
7683 let Predicates = [HasVLX] in {
7684 defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
7685 X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
7686 "", f64mem>, EVEX_V128;
7687 defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
7688 any_fpextend, fpextend, sched.YMM>, EVEX_V256;
7692 // Truncate Double to Float
7693 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
7694 let Predicates = [HasAVX512] in {
7695 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
7696 X86any_vfpround, X86vfpround, sched.ZMM>,
7697 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
7698 X86vfproundRnd, sched.ZMM>, EVEX_V512;
7700 let Predicates = [HasVLX] in {
7701 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
7702 null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
7703 f128mem, VK2WM>, EVEX_V128;
7704 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
7705 X86any_vfpround, X86vfpround,
7706 sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7709 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7710 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7711 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7712 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7713 VK2WM:$mask, VR128X:$src), 0, "att">;
7714 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|"
7715 "$dst {${mask}} {z}, $src}",
7716 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7717 VK2WM:$mask, VR128X:$src), 0, "att">;
7718 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7719 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7720 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7721 "$dst {${mask}}, ${src}{1to2}}",
7722 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7723 VK2WM:$mask, f64mem:$src), 0, "att">;
7724 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7725 "$dst {${mask}} {z}, ${src}{1to2}}",
7726 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7727 VK2WM:$mask, f64mem:$src), 0, "att">;
7729 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7730 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7731 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7732 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7733 VK4WM:$mask, VR256X:$src), 0, "att">;
7734 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
7735 "$dst {${mask}} {z}, $src}",
7736 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7737 VK4WM:$mask, VR256X:$src), 0, "att">;
7738 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7739 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst, f64mem:$src), 0, "att">;
7740 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7741 "$dst {${mask}}, ${src}{1to4}}",
7742 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7743 VK4WM:$mask, f64mem:$src), 0, "att">;
7744 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7745 "$dst {${mask}} {z}, ${src}{1to4}}",
7746 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7747 VK4WM:$mask, f64mem:$src), 0, "att">;
7750 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
7751 VEX_W, PD, EVEX_CD8<64, CD8VF>;
7752 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
7753 PS, EVEX_CD8<32, CD8VH>;
7755 let Predicates = [HasVLX] in {
7756 // Special patterns to allow use of X86vmfpround for masking. Instruction
7757 // patterns have been disabled with null_frag.
7758 def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
7759 (VCVTPD2PSZ128rr VR128X:$src)>;
7760 def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
7762 (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
7763 def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
7765 (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
7767 def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
7768 (VCVTPD2PSZ128rm addr:$src)>;
7769 def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
7771 (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7772 def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
7774 (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
7776 def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
7777 (VCVTPD2PSZ128rmb addr:$src)>;
7778 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7779 (v4f32 VR128X:$src0), VK2WM:$mask),
7780 (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
7781 def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
7782 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
7783 (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
7786 // Convert Signed/Unsigned Doubleword to Double
7787 let Uses = []<Register>, mayRaiseFPException = 0 in
7788 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7789 SDNode MaskOpNode, SDNode OpNode128,
7790 SDNode MaskOpNode128,
7791 X86SchedWriteWidths sched> {
7792 // No rounding in this op
7793 let Predicates = [HasAVX512] in
7794 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode,
7795 MaskOpNode, sched.ZMM>, EVEX_V512;
7797 let Predicates = [HasVLX] in {
7798 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7799 OpNode128, MaskOpNode128, sched.XMM, "{1to2}",
7801 (v2f64 (OpNode128 (bc_v4i32
7803 (scalar_to_vector (loadi64 addr:$src)))))),
7804 (v2f64 (MaskOpNode128 (bc_v4i32
7806 (scalar_to_vector (loadi64 addr:$src))))))>,
7808 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
7809 MaskOpNode, sched.YMM>, EVEX_V256;
7813 // Convert Signed/Unsigned Doubleword to Float
7814 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7815 SDNode MaskOpNode, SDNode OpNodeRnd,
7816 X86SchedWriteWidths sched> {
7817 let Predicates = [HasAVX512] in
7818 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode,
7819 MaskOpNode, sched.ZMM>,
7820 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7821 OpNodeRnd, sched.ZMM>, EVEX_V512;
7823 let Predicates = [HasVLX] in {
7824 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode,
7825 MaskOpNode, sched.XMM>, EVEX_V128;
7826 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode,
7827 MaskOpNode, sched.YMM>, EVEX_V256;
7831 // Convert Float to Signed/Unsigned Doubleword with truncation
7832 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7834 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
7835 let Predicates = [HasAVX512] in {
7836 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7837 MaskOpNode, sched.ZMM>,
7838 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7839 OpNodeSAE, sched.ZMM>, EVEX_V512;
7841 let Predicates = [HasVLX] in {
7842 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7843 MaskOpNode, sched.XMM>, EVEX_V128;
7844 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7845 MaskOpNode, sched.YMM>, EVEX_V256;
7849 // Convert Float to Signed/Unsigned Doubleword
7850 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7851 SDNode MaskOpNode, SDNode OpNodeRnd,
7852 X86SchedWriteWidths sched> {
7853 let Predicates = [HasAVX512] in {
7854 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode,
7855 MaskOpNode, sched.ZMM>,
7856 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7857 OpNodeRnd, sched.ZMM>, EVEX_V512;
7859 let Predicates = [HasVLX] in {
7860 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode,
7861 MaskOpNode, sched.XMM>, EVEX_V128;
7862 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode,
7863 MaskOpNode, sched.YMM>, EVEX_V256;
7867 // Convert Double to Signed/Unsigned Doubleword with truncation
7868 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7869 SDNode MaskOpNode, SDNode OpNodeSAE,
7870 X86SchedWriteWidths sched> {
7871 let Predicates = [HasAVX512] in {
7872 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7873 MaskOpNode, sched.ZMM>,
7874 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7875 OpNodeSAE, sched.ZMM>, EVEX_V512;
7877 let Predicates = [HasVLX] in {
7878 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7879 // memory forms of these instructions in Asm Parser. They have the same
7880 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7881 // due to the same reason.
7882 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7883 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7885 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7886 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7889 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7890 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
7891 VR128X:$src), 0, "att">;
7892 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7893 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7894 VK2WM:$mask, VR128X:$src), 0, "att">;
7895 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7896 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7897 VK2WM:$mask, VR128X:$src), 0, "att">;
7898 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7899 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7900 f64mem:$src), 0, "att">;
7901 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7902 "$dst {${mask}}, ${src}{1to2}}",
7903 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7904 VK2WM:$mask, f64mem:$src), 0, "att">;
7905 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7906 "$dst {${mask}} {z}, ${src}{1to2}}",
7907 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7908 VK2WM:$mask, f64mem:$src), 0, "att">;
7910 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7911 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
7912 VR256X:$src), 0, "att">;
7913 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7914 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7915 VK4WM:$mask, VR256X:$src), 0, "att">;
7916 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7917 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7918 VK4WM:$mask, VR256X:$src), 0, "att">;
7919 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7920 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7921 f64mem:$src), 0, "att">;
7922 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7923 "$dst {${mask}}, ${src}{1to4}}",
7924 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7925 VK4WM:$mask, f64mem:$src), 0, "att">;
7926 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7927 "$dst {${mask}} {z}, ${src}{1to4}}",
7928 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7929 VK4WM:$mask, f64mem:$src), 0, "att">;
7932 // Convert Double to Signed/Unsigned Doubleword
7933 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7934 SDNode MaskOpNode, SDNode OpNodeRnd,
7935 X86SchedWriteWidths sched> {
7936 let Predicates = [HasAVX512] in {
7937 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode,
7938 MaskOpNode, sched.ZMM>,
7939 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7940 OpNodeRnd, sched.ZMM>, EVEX_V512;
7942 let Predicates = [HasVLX] in {
7943 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7944 // memory forms of these instructions in Asm Parcer. They have the same
7945 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7946 // due to the same reason.
7947 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7948 null_frag, null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
7950 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7951 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256;
7954 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
7955 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0, "att">;
7956 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7957 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
7958 VK2WM:$mask, VR128X:$src), 0, "att">;
7959 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7960 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
7961 VK2WM:$mask, VR128X:$src), 0, "att">;
7962 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
7963 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
7964 f64mem:$src), 0, "att">;
7965 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
7966 "$dst {${mask}}, ${src}{1to2}}",
7967 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
7968 VK2WM:$mask, f64mem:$src), 0, "att">;
7969 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
7970 "$dst {${mask}} {z}, ${src}{1to2}}",
7971 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
7972 VK2WM:$mask, f64mem:$src), 0, "att">;
7974 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
7975 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0, "att">;
7976 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
7977 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
7978 VK4WM:$mask, VR256X:$src), 0, "att">;
7979 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
7980 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
7981 VK4WM:$mask, VR256X:$src), 0, "att">;
7982 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
7983 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
7984 f64mem:$src), 0, "att">;
7985 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
7986 "$dst {${mask}}, ${src}{1to4}}",
7987 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
7988 VK4WM:$mask, f64mem:$src), 0, "att">;
7989 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
7990 "$dst {${mask}} {z}, ${src}{1to4}}",
7991 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
7992 VK4WM:$mask, f64mem:$src), 0, "att">;
7995 // Convert Double to Signed/Unsigned Quardword
7996 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7997 SDNode MaskOpNode, SDNode OpNodeRnd,
7998 X86SchedWriteWidths sched> {
7999 let Predicates = [HasDQI] in {
8000 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8001 MaskOpNode, sched.ZMM>,
8002 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
8003 OpNodeRnd, sched.ZMM>, EVEX_V512;
8005 let Predicates = [HasDQI, HasVLX] in {
8006 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8007 MaskOpNode, sched.XMM>, EVEX_V128;
8008 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8009 MaskOpNode, sched.YMM>, EVEX_V256;
8013 // Convert Double to Signed/Unsigned Quardword with truncation
8014 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8015 SDNode MaskOpNode, SDNode OpNodeRnd,
8016 X86SchedWriteWidths sched> {
8017 let Predicates = [HasDQI] in {
8018 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode,
8019 MaskOpNode, sched.ZMM>,
8020 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
8021 OpNodeRnd, sched.ZMM>, EVEX_V512;
8023 let Predicates = [HasDQI, HasVLX] in {
8024 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode,
8025 MaskOpNode, sched.XMM>, EVEX_V128;
8026 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode,
8027 MaskOpNode, sched.YMM>, EVEX_V256;
8031 // Convert Signed/Unsigned Quardword to Double
8032 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8033 SDNode MaskOpNode, SDNode OpNodeRnd,
8034 X86SchedWriteWidths sched> {
8035 let Predicates = [HasDQI] in {
8036 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode,
8037 MaskOpNode, sched.ZMM>,
8038 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
8039 OpNodeRnd, sched.ZMM>, EVEX_V512;
8041 let Predicates = [HasDQI, HasVLX] in {
8042 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode,
8043 MaskOpNode, sched.XMM>, EVEX_V128, NotEVEX2VEXConvertible;
8044 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode,
8045 MaskOpNode, sched.YMM>, EVEX_V256, NotEVEX2VEXConvertible;
8049 // Convert Float to Signed/Unsigned Quardword
8050 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8051 SDNode MaskOpNode, SDNode OpNodeRnd,
8052 X86SchedWriteWidths sched> {
8053 let Predicates = [HasDQI] in {
8054 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8055 MaskOpNode, sched.ZMM>,
8056 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
8057 OpNodeRnd, sched.ZMM>, EVEX_V512;
8059 let Predicates = [HasDQI, HasVLX] in {
8060 // Explicitly specified broadcast string, since we take only 2 elements
8061 // from v4f32x_info source
8062 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8063 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8064 (v2i64 (OpNode (bc_v4f32
8066 (scalar_to_vector (loadf64 addr:$src)))))),
8067 (v2i64 (MaskOpNode (bc_v4f32
8069 (scalar_to_vector (loadf64 addr:$src))))))>,
8071 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8072 MaskOpNode, sched.YMM>, EVEX_V256;
8076 // Convert Float to Signed/Unsigned Quardword with truncation
8077 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
8078 SDNode MaskOpNode, SDNode OpNodeRnd,
8079 X86SchedWriteWidths sched> {
8080 let Predicates = [HasDQI] in {
8081 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode,
8082 MaskOpNode, sched.ZMM>,
8083 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
8084 OpNodeRnd, sched.ZMM>, EVEX_V512;
8086 let Predicates = [HasDQI, HasVLX] in {
8087 // Explicitly specified broadcast string, since we take only 2 elements
8088 // from v4f32x_info source
8089 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
8090 MaskOpNode, sched.XMM, "{1to2}", "", f64mem, VK2WM,
8091 (v2i64 (OpNode (bc_v4f32
8093 (scalar_to_vector (loadf64 addr:$src)))))),
8094 (v2i64 (MaskOpNode (bc_v4f32
8096 (scalar_to_vector (loadf64 addr:$src))))))>,
8098 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode,
8099 MaskOpNode, sched.YMM>, EVEX_V256;
8103 // Convert Signed/Unsigned Quardword to Float
8104 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
8105 SDNode MaskOpNode, SDNode OpNodeRnd,
8106 X86SchedWriteWidths sched> {
8107 let Predicates = [HasDQI] in {
8108 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
8109 MaskOpNode, sched.ZMM>,
8110 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
8111 OpNodeRnd, sched.ZMM>, EVEX_V512;
8113 let Predicates = [HasDQI, HasVLX] in {
8114 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
8115 // memory forms of these instructions in Asm Parcer. They have the same
8116 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
8117 // due to the same reason.
8118 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
8119 null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
8120 EVEX_V128, NotEVEX2VEXConvertible;
8121 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
8122 MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
8123 NotEVEX2VEXConvertible;
8126 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
8127 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
8128 VR128X:$src), 0, "att">;
8129 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8130 (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
8131 VK2WM:$mask, VR128X:$src), 0, "att">;
8132 def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
8133 (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
8134 VK2WM:$mask, VR128X:$src), 0, "att">;
8135 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
8136 (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
8137 i64mem:$src), 0, "att">;
8138 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
8139 "$dst {${mask}}, ${src}{1to2}}",
8140 (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
8141 VK2WM:$mask, i64mem:$src), 0, "att">;
8142 def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
8143 "$dst {${mask}} {z}, ${src}{1to2}}",
8144 (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
8145 VK2WM:$mask, i64mem:$src), 0, "att">;
8147 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
8148 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
8149 VR256X:$src), 0, "att">;
8150 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
8151 "$dst {${mask}}, $src}",
8152 (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
8153 VK4WM:$mask, VR256X:$src), 0, "att">;
8154 def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
8155 "$dst {${mask}} {z}, $src}",
8156 (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
8157 VK4WM:$mask, VR256X:$src), 0, "att">;
8158 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
8159 (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
8160 i64mem:$src), 0, "att">;
8161 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
8162 "$dst {${mask}}, ${src}{1to4}}",
8163 (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
8164 VK4WM:$mask, i64mem:$src), 0, "att">;
8165 def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
8166 "$dst {${mask}} {z}, ${src}{1to4}}",
8167 (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
8168 VK4WM:$mask, i64mem:$src), 0, "att">;
8171 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, sint_to_fp,
8172 X86any_VSintToFP, X86VSintToFP,
8173 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8175 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, sint_to_fp,
8176 X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8177 PS, EVEX_CD8<32, CD8VF>;
8179 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
8180 X86cvttp2si, X86cvttp2siSAE,
8181 SchedWriteCvtPS2DQ>, XS, EVEX_CD8<32, CD8VF>;
8183 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
8184 X86cvttp2si, X86cvttp2siSAE,
8185 SchedWriteCvtPD2DQ>,
8186 PD, VEX_W, EVEX_CD8<64, CD8VF>;
8188 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
8189 X86cvttp2ui, X86cvttp2uiSAE,
8190 SchedWriteCvtPS2DQ>, PS, EVEX_CD8<32, CD8VF>;
8192 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
8193 X86cvttp2ui, X86cvttp2uiSAE,
8194 SchedWriteCvtPD2DQ>,
8195 PS, VEX_W, EVEX_CD8<64, CD8VF>;
8197 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
8198 uint_to_fp, X86any_VUintToFP, X86VUintToFP,
8199 SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>;
8201 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp,
8202 uint_to_fp, X86VUintToFpRnd,
8203 SchedWriteCvtDQ2PS>, XD, EVEX_CD8<32, CD8VF>;
8205 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
8206 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8207 EVEX_CD8<32, CD8VF>;
8209 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
8210 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
8211 VEX_W, EVEX_CD8<64, CD8VF>;
8213 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
8214 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
8215 PS, EVEX_CD8<32, CD8VF>;
8217 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
8218 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8219 PS, EVEX_CD8<64, CD8VF>;
8221 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
8222 X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8223 PD, EVEX_CD8<64, CD8VF>;
8225 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
8226 X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, PD,
8227 EVEX_CD8<32, CD8VH>;
8229 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
8230 X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
8231 PD, EVEX_CD8<64, CD8VF>;
8233 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
8234 X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, PD,
8235 EVEX_CD8<32, CD8VH>;
8237 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
8238 X86cvttp2si, X86cvttp2siSAE,
8239 SchedWriteCvtPD2DQ>, VEX_W,
8240 PD, EVEX_CD8<64, CD8VF>;
8242 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
8243 X86cvttp2si, X86cvttp2siSAE,
8244 SchedWriteCvtPS2DQ>, PD,
8245 EVEX_CD8<32, CD8VH>;
8247 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
8248 X86cvttp2ui, X86cvttp2uiSAE,
8249 SchedWriteCvtPD2DQ>, VEX_W,
8250 PD, EVEX_CD8<64, CD8VF>;
8252 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
8253 X86cvttp2ui, X86cvttp2uiSAE,
8254 SchedWriteCvtPS2DQ>, PD,
8255 EVEX_CD8<32, CD8VH>;
8257 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
8258 sint_to_fp, X86VSintToFpRnd,
8259 SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
8261 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
8262 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
8263 VEX_W, XS, EVEX_CD8<64, CD8VF>;
8265 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
8266 sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
8267 VEX_W, PS, EVEX_CD8<64, CD8VF>;
8269 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
8270 uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
8271 VEX_W, XD, EVEX_CD8<64, CD8VF>;
8273 let Predicates = [HasVLX] in {
8274 // Special patterns to allow use of X86mcvtp2Int for masking. Instruction
8275 // patterns have been disabled with null_frag.
8276 def : Pat<(v4i32 (X86cvtp2Int (v2f64 VR128X:$src))),
8277 (VCVTPD2DQZ128rr VR128X:$src)>;
8278 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8280 (VCVTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8281 def : Pat<(X86mcvtp2Int (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8283 (VCVTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8285 def : Pat<(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))),
8286 (VCVTPD2DQZ128rm addr:$src)>;
8287 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8289 (VCVTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8290 def : Pat<(X86mcvtp2Int (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8292 (VCVTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8294 def : Pat<(v4i32 (X86cvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)))),
8295 (VCVTPD2DQZ128rmb addr:$src)>;
8296 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8297 (v4i32 VR128X:$src0), VK2WM:$mask),
8298 (VCVTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8299 def : Pat<(X86mcvtp2Int (v2f64 (X86VBroadcastld64 addr:$src)),
8300 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8301 (VCVTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8303 // Special patterns to allow use of X86mcvttp2si for masking. Instruction
8304 // patterns have been disabled with null_frag.
8305 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 VR128X:$src))),
8306 (VCVTTPD2DQZ128rr VR128X:$src)>;
8307 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8309 (VCVTTPD2DQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8310 def : Pat<(X86mcvttp2si (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8312 (VCVTTPD2DQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8314 def : Pat<(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))),
8315 (VCVTTPD2DQZ128rm addr:$src)>;
8316 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8318 (VCVTTPD2DQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8319 def : Pat<(X86mcvttp2si (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8321 (VCVTTPD2DQZ128rmkz VK2WM:$mask, addr:$src)>;
8323 def : Pat<(v4i32 (X86any_cvttp2si (v2f64 (X86VBroadcastld64 addr:$src)))),
8324 (VCVTTPD2DQZ128rmb addr:$src)>;
8325 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8326 (v4i32 VR128X:$src0), VK2WM:$mask),
8327 (VCVTTPD2DQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8328 def : Pat<(X86mcvttp2si (v2f64 (X86VBroadcastld64 addr:$src)),
8329 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8330 (VCVTTPD2DQZ128rmbkz VK2WM:$mask, addr:$src)>;
8332 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8333 // patterns have been disabled with null_frag.
8334 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 VR128X:$src))),
8335 (VCVTPD2UDQZ128rr VR128X:$src)>;
8336 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8338 (VCVTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8339 def : Pat<(X86mcvtp2UInt (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8341 (VCVTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8343 def : Pat<(v4i32 (X86cvtp2UInt (loadv2f64 addr:$src))),
8344 (VCVTPD2UDQZ128rm addr:$src)>;
8345 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8347 (VCVTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8348 def : Pat<(X86mcvtp2UInt (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8350 (VCVTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8352 def : Pat<(v4i32 (X86cvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)))),
8353 (VCVTPD2UDQZ128rmb addr:$src)>;
8354 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8355 (v4i32 VR128X:$src0), VK2WM:$mask),
8356 (VCVTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8357 def : Pat<(X86mcvtp2UInt (v2f64 (X86VBroadcastld64 addr:$src)),
8358 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8359 (VCVTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8361 // Special patterns to allow use of X86mcvtp2UInt for masking. Instruction
8362 // patterns have been disabled with null_frag.
8363 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 VR128X:$src))),
8364 (VCVTTPD2UDQZ128rr VR128X:$src)>;
8365 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), (v4i32 VR128X:$src0),
8367 (VCVTTPD2UDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8368 def : Pat<(X86mcvttp2ui (v2f64 VR128X:$src), v4i32x_info.ImmAllZerosV,
8370 (VCVTTPD2UDQZ128rrkz VK2WM:$mask, VR128X:$src)>;
8372 def : Pat<(v4i32 (X86any_cvttp2ui (loadv2f64 addr:$src))),
8373 (VCVTTPD2UDQZ128rm addr:$src)>;
8374 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), (v4i32 VR128X:$src0),
8376 (VCVTTPD2UDQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8377 def : Pat<(X86mcvttp2ui (loadv2f64 addr:$src), v4i32x_info.ImmAllZerosV,
8379 (VCVTTPD2UDQZ128rmkz VK2WM:$mask, addr:$src)>;
8381 def : Pat<(v4i32 (X86any_cvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)))),
8382 (VCVTTPD2UDQZ128rmb addr:$src)>;
8383 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8384 (v4i32 VR128X:$src0), VK2WM:$mask),
8385 (VCVTTPD2UDQZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8386 def : Pat<(X86mcvttp2ui (v2f64 (X86VBroadcastld64 addr:$src)),
8387 v4i32x_info.ImmAllZerosV, VK2WM:$mask),
8388 (VCVTTPD2UDQZ128rmbkz VK2WM:$mask, addr:$src)>;
8391 let Predicates = [HasDQI, HasVLX] in {
8392 def : Pat<(v2i64 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8393 (VCVTPS2QQZ128rm addr:$src)>;
8394 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8395 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8397 (VCVTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8398 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8399 (X86cvtp2Int (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8400 v2i64x_info.ImmAllZerosV)),
8401 (VCVTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8403 def : Pat<(v2i64 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8404 (VCVTPS2UQQZ128rm addr:$src)>;
8405 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8406 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8408 (VCVTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8409 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8410 (X86cvtp2UInt (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8411 v2i64x_info.ImmAllZerosV)),
8412 (VCVTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8414 def : Pat<(v2i64 (X86any_cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8415 (VCVTTPS2QQZ128rm addr:$src)>;
8416 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8417 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8419 (VCVTTPS2QQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8420 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8421 (X86cvttp2si (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8422 v2i64x_info.ImmAllZerosV)),
8423 (VCVTTPS2QQZ128rmkz VK2WM:$mask, addr:$src)>;
8425 def : Pat<(v2i64 (X86any_cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src))))),
8426 (VCVTTPS2UQQZ128rm addr:$src)>;
8427 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8428 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8430 (VCVTTPS2UQQZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8431 def : Pat<(v2i64 (vselect_mask VK2WM:$mask,
8432 (X86cvttp2ui (bc_v4f32 (v2f64 (X86vzload64 addr:$src)))),
8433 v2i64x_info.ImmAllZerosV)),
8434 (VCVTTPS2UQQZ128rmkz VK2WM:$mask, addr:$src)>;
8437 let Predicates = [HasVLX] in {
8438 def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8439 (VCVTDQ2PDZ128rm addr:$src)>;
8440 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8441 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8443 (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8444 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8445 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8446 v2f64x_info.ImmAllZerosV)),
8447 (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8449 def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
8450 (VCVTUDQ2PDZ128rm addr:$src)>;
8451 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8452 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8454 (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8455 def : Pat<(v2f64 (vselect_mask VK2WM:$mask,
8456 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))),
8457 v2f64x_info.ImmAllZerosV)),
8458 (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
8461 let Predicates = [HasDQI, HasVLX] in {
8462 // Special patterns to allow use of X86VMSintToFP for masking. Instruction
8463 // patterns have been disabled with null_frag.
8464 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
8465 (VCVTQQ2PSZ128rr VR128X:$src)>;
8466 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8468 (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8469 def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8471 (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8473 def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
8474 (VCVTQQ2PSZ128rm addr:$src)>;
8475 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8477 (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8478 def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8480 (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8482 def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8483 (VCVTQQ2PSZ128rmb addr:$src)>;
8484 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8485 (v4f32 VR128X:$src0), VK2WM:$mask),
8486 (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8487 def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8488 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8489 (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8491 // Special patterns to allow use of X86VMUintToFP for masking. Instruction
8492 // patterns have been disabled with null_frag.
8493 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
8494 (VCVTUQQ2PSZ128rr VR128X:$src)>;
8495 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
8497 (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
8498 def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
8500 (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
8502 def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
8503 (VCVTUQQ2PSZ128rm addr:$src)>;
8504 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
8506 (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8507 def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
8509 (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
8511 def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
8512 (VCVTUQQ2PSZ128rmb addr:$src)>;
8513 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8514 (v4f32 VR128X:$src0), VK2WM:$mask),
8515 (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
8516 def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
8517 v4f32x_info.ImmAllZerosV, VK2WM:$mask),
8518 (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
8521 //===----------------------------------------------------------------------===//
8522 // Half precision conversion instructions
8523 //===----------------------------------------------------------------------===//
8525 let Uses = [MXCSR], mayRaiseFPException = 1 in
8526 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8527 X86MemOperand x86memop, dag ld_dag,
8528 X86FoldableSchedWrite sched> {
8529 defm rr : AVX512_maskable_split<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst),
8530 (ins _src.RC:$src), "vcvtph2ps", "$src", "$src",
8531 (X86any_cvtph2ps (_src.VT _src.RC:$src)),
8532 (X86cvtph2ps (_src.VT _src.RC:$src))>,
8533 T8PD, Sched<[sched]>;
8534 defm rm : AVX512_maskable_split<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst),
8535 (ins x86memop:$src), "vcvtph2ps", "$src", "$src",
8536 (X86any_cvtph2ps (_src.VT ld_dag)),
8537 (X86cvtph2ps (_src.VT ld_dag))>,
8538 T8PD, Sched<[sched.Folded]>;
8541 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8542 X86FoldableSchedWrite sched> {
8543 let Uses = [MXCSR] in
8544 defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst),
8545 (ins _src.RC:$src), "vcvtph2ps",
8546 "{sae}, $src", "$src, {sae}",
8547 (X86cvtph2psSAE (_src.VT _src.RC:$src))>,
8548 T8PD, EVEX_B, Sched<[sched]>;
8551 let Predicates = [HasAVX512] in
8552 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem,
8553 (load addr:$src), WriteCvtPH2PSZ>,
8554 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>,
8555 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8557 let Predicates = [HasVLX] in {
8558 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
8559 (load addr:$src), WriteCvtPH2PSY>, EVEX, EVEX_V256,
8560 EVEX_CD8<32, CD8VH>;
8561 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
8562 (bitconvert (v2i64 (X86vzload64 addr:$src))),
8563 WriteCvtPH2PS>, EVEX, EVEX_V128,
8564 EVEX_CD8<32, CD8VH>;
8566 // Pattern match vcvtph2ps of a scalar i64 load.
8567 def : Pat<(v4f32 (X86any_cvtph2ps (v8i16 (bitconvert
8568 (v2i64 (scalar_to_vector (loadi64 addr:$src))))))),
8569 (VCVTPH2PSZ128rm addr:$src)>;
8572 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8573 X86MemOperand x86memop, SchedWrite RR, SchedWrite MR> {
8574 let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8575 def rr : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8576 (ins _src.RC:$src1, i32u8imm:$src2),
8577 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
8578 [(set _dest.RC:$dst,
8579 (X86any_cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
8581 let Constraints = "$src0 = $dst" in
8582 def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8583 (ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8584 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
8585 [(set _dest.RC:$dst,
8586 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8587 _dest.RC:$src0, _src.KRCWM:$mask))]>,
8588 Sched<[RR]>, EVEX_K;
8589 def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
8590 (ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8591 "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
8592 [(set _dest.RC:$dst,
8593 (X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
8594 _dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
8595 Sched<[RR]>, EVEX_KZ;
8596 let hasSideEffects = 0, mayStore = 1 in {
8597 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
8598 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
8599 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8601 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
8602 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
8603 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
8604 EVEX_K, Sched<[MR]>, NotMemoryFoldable;
8609 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src,
8611 let hasSideEffects = 0, Uses = [MXCSR] in
8612 defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
8613 (outs _dest.RC:$dst),
8614 (ins _src.RC:$src1, i32u8imm:$src2),
8615 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>,
8616 EVEX_B, AVX512AIi8Base, Sched<[Sched]>;
8619 let Predicates = [HasAVX512] in {
8620 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem,
8621 WriteCvtPS2PHZ, WriteCvtPS2PHZSt>,
8622 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>,
8623 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
8625 def : Pat<(store (v16i16 (X86any_cvtps2ph VR512:$src1, timm:$src2)), addr:$dst),
8626 (VCVTPS2PHZmr addr:$dst, VR512:$src1, timm:$src2)>;
8629 let Predicates = [HasVLX] in {
8630 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem,
8631 WriteCvtPS2PHY, WriteCvtPS2PHYSt>,
8632 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
8633 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem,
8634 WriteCvtPS2PH, WriteCvtPS2PHSt>,
8635 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
8637 def : Pat<(store (f64 (extractelt
8638 (bc_v2f64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8639 (iPTR 0))), addr:$dst),
8640 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8641 def : Pat<(store (i64 (extractelt
8642 (bc_v2i64 (v8i16 (X86any_cvtps2ph VR128X:$src1, timm:$src2))),
8643 (iPTR 0))), addr:$dst),
8644 (VCVTPS2PHZ128mr addr:$dst, VR128X:$src1, timm:$src2)>;
8645 def : Pat<(store (v8i16 (X86any_cvtps2ph VR256X:$src1, timm:$src2)), addr:$dst),
8646 (VCVTPS2PHZ256mr addr:$dst, VR256X:$src1, timm:$src2)>;
8649 // Unordered/Ordered scalar fp compare with Sae and set EFLAGS
8650 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
8651 string OpcodeStr, Domain d,
8652 X86FoldableSchedWrite sched = WriteFComX> {
8653 let hasSideEffects = 0, Uses = [MXCSR] in
8654 def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
8655 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
8656 EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
8659 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8660 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
8661 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8662 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
8663 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8664 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
8665 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
8666 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
8667 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
8670 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
8671 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86any_fcmp, f32, f32mem, loadf32,
8672 "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8673 EVEX_CD8<32, CD8VT1>;
8674 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
8675 "ucomisd", SSEPackedDouble>, PD, EVEX,
8676 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8677 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
8678 "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8679 EVEX_CD8<32, CD8VT1>;
8680 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
8681 "comisd", SSEPackedDouble>, PD, EVEX,
8682 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8683 let isCodeGenOnly = 1 in {
8684 defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
8685 sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8686 EVEX_CD8<32, CD8VT1>;
8687 defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
8688 sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
8689 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8691 defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
8692 sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
8693 EVEX_CD8<32, CD8VT1>;
8694 defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
8695 sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
8696 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
8700 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
8701 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8702 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8703 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8704 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8705 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8706 "$src2, $src1", "$src1, $src2",
8707 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8708 EVEX_4V, VEX_LIG, Sched<[sched]>;
8709 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8710 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8711 "$src2, $src1", "$src1, $src2",
8712 (OpNode (_.VT _.RC:$src1),
8713 (_.ScalarIntMemFrags addr:$src2))>, EVEX_4V, VEX_LIG,
8714 Sched<[sched.Folded, sched.ReadAfterFold]>;
8718 defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
8719 f32x_info>, EVEX_CD8<32, CD8VT1>,
8721 defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
8722 f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
8724 defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
8725 SchedWriteFRsqrt.Scl, f32x_info>,
8726 EVEX_CD8<32, CD8VT1>, T8PD;
8727 defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
8728 SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
8729 EVEX_CD8<64, CD8VT1>, T8PD;
8731 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
8732 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
8733 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
8734 let ExeDomain = _.ExeDomain in {
8735 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8736 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8737 (_.VT (OpNode _.RC:$src))>, EVEX, T8PD,
8739 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8740 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8742 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD,
8743 Sched<[sched.Folded, sched.ReadAfterFold]>;
8744 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8745 (ins _.ScalarMemOp:$src), OpcodeStr,
8746 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8748 (_.BroadcastLdFrag addr:$src)))>,
8749 EVEX, T8PD, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8753 let Uses = [MXCSR] in
8754 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
8755 X86SchedWriteWidths sched> {
8756 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
8757 v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
8758 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
8759 v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8761 // Define only if AVX512VL feature is present.
8762 let Predicates = [HasVLX] in {
8763 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8764 OpNode, sched.XMM, v4f32x_info>,
8765 EVEX_V128, EVEX_CD8<32, CD8VF>;
8766 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
8767 OpNode, sched.YMM, v8f32x_info>,
8768 EVEX_V256, EVEX_CD8<32, CD8VF>;
8769 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8770 OpNode, sched.XMM, v2f64x_info>,
8771 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
8772 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
8773 OpNode, sched.YMM, v4f64x_info>,
8774 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
8778 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
8779 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
8781 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
8782 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
8783 SDNode OpNode, SDNode OpNodeSAE,
8784 X86FoldableSchedWrite sched> {
8785 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
8786 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8787 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8788 "$src2, $src1", "$src1, $src2",
8789 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8790 Sched<[sched]>, SIMD_EXC;
8792 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8793 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8794 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
8795 (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
8796 EVEX_B, Sched<[sched]>;
8798 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8799 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8800 "$src2, $src1", "$src1, $src2",
8801 (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2))>,
8802 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8806 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
8807 SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
8808 defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
8809 sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
8810 defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
8811 sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
8814 let Predicates = [HasERI] in {
8815 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
8816 SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
8817 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
8818 SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
8821 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
8822 SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
8823 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
8825 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8826 SDNode OpNode, X86FoldableSchedWrite sched> {
8827 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8828 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8829 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8830 (OpNode (_.VT _.RC:$src))>,
8833 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8834 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8836 (bitconvert (_.LdFrag addr:$src))))>,
8837 Sched<[sched.Folded, sched.ReadAfterFold]>;
8839 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8840 (ins _.ScalarMemOp:$src), OpcodeStr,
8841 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8843 (_.BroadcastLdFrag addr:$src)))>,
8844 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8847 multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8848 SDNode OpNode, X86FoldableSchedWrite sched> {
8849 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
8850 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8851 (ins _.RC:$src), OpcodeStr,
8852 "{sae}, $src", "$src, {sae}",
8853 (OpNode (_.VT _.RC:$src))>,
8854 EVEX_B, Sched<[sched]>;
8857 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
8858 SDNode OpNodeSAE, X86SchedWriteWidths sched> {
8859 defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
8860 avx512_fp28_p_sae<opc, OpcodeStr#"ps", v16f32_info, OpNodeSAE, sched.ZMM>,
8861 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
8862 defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
8863 avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
8864 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
8867 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
8868 SDNode OpNode, X86SchedWriteWidths sched> {
8869 // Define only if AVX512VL feature is present.
8870 let Predicates = [HasVLX] in {
8871 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode,
8873 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
8874 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode,
8876 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
8877 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
8879 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8880 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
8882 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
8886 let Predicates = [HasERI] in {
8887 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
8888 SchedWriteFRsqrt>, EVEX;
8889 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
8890 SchedWriteFRcp>, EVEX;
8891 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
8892 SchedWriteFAdd>, EVEX;
8894 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
8896 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
8897 SchedWriteFRnd>, EVEX;
8899 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
8900 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8901 let ExeDomain = _.ExeDomain in
8902 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8903 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
8904 (_.VT (X86fsqrtRnd _.RC:$src, (i32 timm:$rc)))>,
8905 EVEX, EVEX_B, EVEX_RC, Sched<[sched]>;
8908 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
8909 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
8910 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
8911 defm r: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
8912 (ins _.RC:$src), OpcodeStr, "$src", "$src",
8913 (_.VT (any_fsqrt _.RC:$src)),
8914 (_.VT (fsqrt _.RC:$src))>, EVEX,
8916 defm m: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8917 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
8918 (any_fsqrt (_.VT (_.LdFrag addr:$src))),
8919 (fsqrt (_.VT (_.LdFrag addr:$src)))>, EVEX,
8920 Sched<[sched.Folded, sched.ReadAfterFold]>;
8921 defm mb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
8922 (ins _.ScalarMemOp:$src), OpcodeStr,
8923 "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
8924 (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src))),
8925 (fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>,
8926 EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
8930 let Uses = [MXCSR], mayRaiseFPException = 1 in
8931 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
8932 X86SchedWriteSizes sched> {
8933 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8934 sched.PS.ZMM, v16f32_info>,
8935 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8936 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8937 sched.PD.ZMM, v8f64_info>,
8938 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8939 // Define only if AVX512VL feature is present.
8940 let Predicates = [HasVLX] in {
8941 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8942 sched.PS.XMM, v4f32x_info>,
8943 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
8944 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
8945 sched.PS.YMM, v8f32x_info>,
8946 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
8947 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8948 sched.PD.XMM, v2f64x_info>,
8949 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8950 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
8951 sched.PD.YMM, v4f64x_info>,
8952 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8956 let Uses = [MXCSR] in
8957 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
8958 X86SchedWriteSizes sched> {
8959 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
8960 sched.PS.ZMM, v16f32_info>,
8961 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
8962 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
8963 sched.PD.ZMM, v8f64_info>,
8964 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
8967 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
8968 X86VectorVTInfo _, string Name> {
8969 let ExeDomain = _.ExeDomain in {
8970 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8971 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
8972 "$src2, $src1", "$src1, $src2",
8973 (X86fsqrts (_.VT _.RC:$src1),
8974 (_.VT _.RC:$src2))>,
8975 Sched<[sched]>, SIMD_EXC;
8976 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
8977 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
8978 "$src2, $src1", "$src1, $src2",
8979 (X86fsqrts (_.VT _.RC:$src1),
8980 (_.ScalarIntMemFrags addr:$src2))>,
8981 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
8982 let Uses = [MXCSR] in
8983 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
8984 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
8985 "$rc, $src2, $src1", "$src1, $src2, $rc",
8986 (X86fsqrtRnds (_.VT _.RC:$src1),
8989 EVEX_B, EVEX_RC, Sched<[sched]>;
8991 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
8992 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
8993 (ins _.FRC:$src1, _.FRC:$src2),
8994 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
8995 Sched<[sched]>, SIMD_EXC;
8997 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
8998 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
8999 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9000 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9004 let Predicates = [HasAVX512] in {
9005 def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
9006 (!cast<Instruction>(Name#Zr)
9007 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
9010 let Predicates = [HasAVX512, OptForSize] in {
9011 def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
9012 (!cast<Instruction>(Name#Zm)
9013 (_.EltVT (IMPLICIT_DEF)), addr:$src)>;
9017 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
9018 X86SchedWriteSizes sched> {
9019 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
9020 EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
9021 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
9022 EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
9025 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
9026 avx512_sqrt_packed_all_round<0x51, "vsqrt", SchedWriteFSqrtSizes>;
9028 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LIG;
9030 multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
9031 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
9032 let ExeDomain = _.ExeDomain in {
9033 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9034 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9035 "$src3, $src2, $src1", "$src1, $src2, $src3",
9036 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9037 (i32 timm:$src3)))>,
9038 Sched<[sched]>, SIMD_EXC;
9040 let Uses = [MXCSR] in
9041 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9042 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
9043 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
9044 (_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
9045 (i32 timm:$src3)))>, EVEX_B,
9048 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9049 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
9051 "$src3, $src2, $src1", "$src1, $src2, $src3",
9052 (_.VT (X86RndScales _.RC:$src1,
9053 (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3)))>,
9054 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9056 let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
9057 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
9058 (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3),
9059 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9060 []>, Sched<[sched]>, SIMD_EXC;
9063 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
9064 (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9065 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9066 []>, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
9070 let Predicates = [HasAVX512] in {
9071 def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2),
9072 (_.EltVT (!cast<Instruction>(NAME#r) (_.EltVT (IMPLICIT_DEF)),
9073 _.FRC:$src1, timm:$src2))>;
9076 let Predicates = [HasAVX512, OptForSize] in {
9077 def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
9078 (_.EltVT (!cast<Instruction>(NAME#m) (_.EltVT (IMPLICIT_DEF)),
9079 addr:$src1, timm:$src2))>;
9083 defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
9084 SchedWriteFRnd.Scl, f32x_info>,
9085 AVX512AIi8Base, EVEX_4V, VEX_LIG,
9086 EVEX_CD8<32, CD8VT1>;
9088 defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
9089 SchedWriteFRnd.Scl, f64x_info>,
9090 VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
9091 EVEX_CD8<64, CD8VT1>;
9093 multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
9094 dag Mask, X86VectorVTInfo _, PatLeaf ZeroFP,
9095 dag OutMask, Predicate BasePredicate> {
9096 let Predicates = [BasePredicate] in {
9097 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9098 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9099 (extractelt _.VT:$dst, (iPTR 0))))),
9100 (!cast<Instruction>("V"#OpcPrefix#r_Intk)
9101 _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>;
9103 def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask,
9104 (OpNode (extractelt _.VT:$src2, (iPTR 0))),
9106 (!cast<Instruction>("V"#OpcPrefix#r_Intkz)
9107 OutMask, _.VT:$src2, _.VT:$src1)>;
9111 defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
9112 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
9113 fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9114 defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
9115 (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
9116 fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
9119 //-------------------------------------------------
9120 // Integer truncate and extend operations
9121 //-------------------------------------------------
9123 // PatFrags that contain a select and a truncate op. The take operands in the
9124 // same order as X86vmtrunc, X86vmtruncs, X86vmtruncus. This allows us to pass
9125 // either to the multiclasses.
9126 def select_trunc : PatFrag<(ops node:$src, node:$src0, node:$mask),
9127 (vselect_mask node:$mask,
9128 (trunc node:$src), node:$src0)>;
9129 def select_truncs : PatFrag<(ops node:$src, node:$src0, node:$mask),
9130 (vselect_mask node:$mask,
9131 (X86vtruncs node:$src), node:$src0)>;
9132 def select_truncus : PatFrag<(ops node:$src, node:$src0, node:$mask),
9133 (vselect_mask node:$mask,
9134 (X86vtruncus node:$src), node:$src0)>;
9136 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9137 SDPatternOperator MaskNode,
9138 X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo,
9139 X86VectorVTInfo DestInfo, X86MemOperand x86memop> {
9140 let ExeDomain = DestInfo.ExeDomain in {
9141 def rr : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9142 (ins SrcInfo.RC:$src),
9143 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9144 [(set DestInfo.RC:$dst,
9145 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))))]>,
9146 EVEX, Sched<[sched]>;
9147 let Constraints = "$src0 = $dst" in
9148 def rrk : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9149 (ins DestInfo.RC:$src0, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9150 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9151 [(set DestInfo.RC:$dst,
9152 (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9153 (DestInfo.VT DestInfo.RC:$src0),
9154 SrcInfo.KRCWM:$mask))]>,
9155 EVEX, EVEX_K, Sched<[sched]>;
9156 def rrkz : AVX512XS8I<opc, MRMDestReg, (outs DestInfo.RC:$dst),
9157 (ins SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9158 OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
9159 [(set DestInfo.RC:$dst,
9160 (DestInfo.VT (MaskNode (SrcInfo.VT SrcInfo.RC:$src),
9161 DestInfo.ImmAllZerosV, SrcInfo.KRCWM:$mask)))]>,
9162 EVEX, EVEX_KZ, Sched<[sched]>;
9165 let mayStore = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in {
9166 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
9167 (ins x86memop:$dst, SrcInfo.RC:$src),
9168 OpcodeStr # "\t{$src, $dst|$dst, $src}", []>,
9169 EVEX, Sched<[sched.Folded]>;
9171 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
9172 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
9173 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
9174 EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
9175 }//mayStore = 1, hasSideEffects = 0
9178 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
9179 X86VectorVTInfo DestInfo,
9180 PatFrag truncFrag, PatFrag mtruncFrag,
9183 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
9184 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mr)
9185 addr:$dst, SrcInfo.RC:$src)>;
9187 def : Pat<(mtruncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst,
9188 SrcInfo.KRCWM:$mask),
9189 (!cast<Instruction>(Name#SrcInfo.ZSuffix#mrk)
9190 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
9193 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
9194 SDNode OpNode256, SDNode OpNode512,
9195 SDPatternOperator MaskNode128,
9196 SDPatternOperator MaskNode256,
9197 SDPatternOperator MaskNode512,
9198 X86FoldableSchedWrite sched,
9199 AVX512VLVectorVTInfo VTSrcInfo,
9200 X86VectorVTInfo DestInfoZ128,
9201 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
9202 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
9203 X86MemOperand x86memopZ, PatFrag truncFrag,
9204 PatFrag mtruncFrag, Predicate prd = HasAVX512>{
9206 let Predicates = [HasVLX, prd] in {
9207 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
9208 VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
9209 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
9210 truncFrag, mtruncFrag, NAME>, EVEX_V128;
9212 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
9213 VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
9214 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
9215 truncFrag, mtruncFrag, NAME>, EVEX_V256;
9217 let Predicates = [prd] in
9218 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
9219 VTSrcInfo.info512, DestInfoZ, x86memopZ>,
9220 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
9221 truncFrag, mtruncFrag, NAME>, EVEX_V512;
9224 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9225 SDPatternOperator MaskNode,
9226 X86FoldableSchedWrite sched, PatFrag StoreNode,
9227 PatFrag MaskedStoreNode, SDNode InVecNode,
9228 SDPatternOperator InVecMaskNode> {
9229 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode,
9230 InVecMaskNode, InVecMaskNode, InVecMaskNode, sched,
9231 avx512vl_i64_info, v16i8x_info, v16i8x_info,
9232 v16i8x_info, i16mem, i32mem, i64mem, StoreNode,
9233 MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
9236 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9237 SDPatternOperator MaskNode,
9238 X86FoldableSchedWrite sched, PatFrag StoreNode,
9239 PatFrag MaskedStoreNode, SDNode InVecNode,
9240 SDPatternOperator InVecMaskNode> {
9241 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9242 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9243 avx512vl_i64_info, v8i16x_info, v8i16x_info,
9244 v8i16x_info, i32mem, i64mem, i128mem, StoreNode,
9245 MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
9248 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
9249 SDPatternOperator MaskNode,
9250 X86FoldableSchedWrite sched, PatFrag StoreNode,
9251 PatFrag MaskedStoreNode, SDNode InVecNode,
9252 SDPatternOperator InVecMaskNode> {
9253 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9254 InVecMaskNode, MaskNode, MaskNode, sched,
9255 avx512vl_i64_info, v4i32x_info, v4i32x_info,
9256 v8i32x_info, i64mem, i128mem, i256mem, StoreNode,
9257 MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
9260 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
9261 SDPatternOperator MaskNode,
9262 X86FoldableSchedWrite sched, PatFrag StoreNode,
9263 PatFrag MaskedStoreNode, SDNode InVecNode,
9264 SDPatternOperator InVecMaskNode> {
9265 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode,
9266 InVecMaskNode, InVecMaskNode, MaskNode, sched,
9267 avx512vl_i32_info, v16i8x_info, v16i8x_info,
9268 v16i8x_info, i32mem, i64mem, i128mem, StoreNode,
9269 MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
9272 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9273 SDPatternOperator MaskNode,
9274 X86FoldableSchedWrite sched, PatFrag StoreNode,
9275 PatFrag MaskedStoreNode, SDNode InVecNode,
9276 SDPatternOperator InVecMaskNode> {
9277 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9278 InVecMaskNode, MaskNode, MaskNode, sched,
9279 avx512vl_i32_info, v8i16x_info, v8i16x_info,
9280 v16i16x_info, i64mem, i128mem, i256mem, StoreNode,
9281 MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
9284 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9285 SDPatternOperator MaskNode,
9286 X86FoldableSchedWrite sched, PatFrag StoreNode,
9287 PatFrag MaskedStoreNode, SDNode InVecNode,
9288 SDPatternOperator InVecMaskNode> {
9289 defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode,
9290 InVecMaskNode, MaskNode, MaskNode, sched,
9291 avx512vl_i16_info, v16i8x_info, v16i8x_info,
9292 v32i8x_info, i64mem, i128mem, i256mem, StoreNode,
9293 MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
9296 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
9297 WriteShuffle256, truncstorevi8,
9298 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9299 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
9300 WriteShuffle256, truncstore_s_vi8,
9301 masked_truncstore_s_vi8, X86vtruncs,
9303 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
9304 select_truncus, WriteShuffle256,
9305 truncstore_us_vi8, masked_truncstore_us_vi8,
9306 X86vtruncus, X86vmtruncus>;
9308 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
9309 WriteShuffle256, truncstorevi16,
9310 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9311 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, select_truncs,
9312 WriteShuffle256, truncstore_s_vi16,
9313 masked_truncstore_s_vi16, X86vtruncs,
9315 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
9316 select_truncus, WriteShuffle256,
9317 truncstore_us_vi16, masked_truncstore_us_vi16,
9318 X86vtruncus, X86vmtruncus>;
9320 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, select_trunc,
9321 WriteShuffle256, truncstorevi32,
9322 masked_truncstorevi32, X86vtrunc, X86vmtrunc>;
9323 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, select_truncs,
9324 WriteShuffle256, truncstore_s_vi32,
9325 masked_truncstore_s_vi32, X86vtruncs,
9327 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
9328 select_truncus, WriteShuffle256,
9329 truncstore_us_vi32, masked_truncstore_us_vi32,
9330 X86vtruncus, X86vmtruncus>;
9332 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, select_trunc,
9333 WriteShuffle256, truncstorevi8,
9334 masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
9335 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, select_truncs,
9336 WriteShuffle256, truncstore_s_vi8,
9337 masked_truncstore_s_vi8, X86vtruncs,
9339 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
9340 select_truncus, WriteShuffle256,
9341 truncstore_us_vi8, masked_truncstore_us_vi8,
9342 X86vtruncus, X86vmtruncus>;
9344 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, select_trunc,
9345 WriteShuffle256, truncstorevi16,
9346 masked_truncstorevi16, X86vtrunc, X86vmtrunc>;
9347 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, select_truncs,
9348 WriteShuffle256, truncstore_s_vi16,
9349 masked_truncstore_s_vi16, X86vtruncs,
9351 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
9352 select_truncus, WriteShuffle256,
9353 truncstore_us_vi16, masked_truncstore_us_vi16,
9354 X86vtruncus, X86vmtruncus>;
9356 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, select_trunc,
9357 WriteShuffle256, truncstorevi8,
9358 masked_truncstorevi8, X86vtrunc,
9360 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, select_truncs,
9361 WriteShuffle256, truncstore_s_vi8,
9362 masked_truncstore_s_vi8, X86vtruncs,
9364 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
9365 select_truncus, WriteShuffle256,
9366 truncstore_us_vi8, masked_truncstore_us_vi8,
9367 X86vtruncus, X86vmtruncus>;
9369 let Predicates = [HasAVX512, NoVLX] in {
9370 def: Pat<(v8i16 (trunc (v8i32 VR256X:$src))),
9371 (v8i16 (EXTRACT_SUBREG
9372 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
9373 VR256X:$src, sub_ymm)))), sub_xmm))>;
9374 def: Pat<(v4i32 (trunc (v4i64 VR256X:$src))),
9375 (v4i32 (EXTRACT_SUBREG
9376 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
9377 VR256X:$src, sub_ymm)))), sub_xmm))>;
9380 let Predicates = [HasBWI, NoVLX] in {
9381 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9382 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
9383 VR256X:$src, sub_ymm))), sub_xmm))>;
9386 // Without BWI we can't use vXi16/vXi8 vselect so we have to use vmtrunc nodes.
9387 multiclass mtrunc_lowering<string InstrName, SDNode OpNode,
9388 X86VectorVTInfo DestInfo,
9389 X86VectorVTInfo SrcInfo> {
9390 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9392 SrcInfo.KRCWM:$mask)),
9393 (!cast<Instruction>(InstrName#"rrk") DestInfo.RC:$src0,
9394 SrcInfo.KRCWM:$mask,
9397 def : Pat<(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src),
9398 DestInfo.ImmAllZerosV,
9399 SrcInfo.KRCWM:$mask)),
9400 (!cast<Instruction>(InstrName#"rrkz") SrcInfo.KRCWM:$mask,
9404 let Predicates = [HasVLX] in {
9405 defm : mtrunc_lowering<"VPMOVDWZ256", X86vmtrunc, v8i16x_info, v8i32x_info>;
9406 defm : mtrunc_lowering<"VPMOVSDWZ256", X86vmtruncs, v8i16x_info, v8i32x_info>;
9407 defm : mtrunc_lowering<"VPMOVUSDWZ256", X86vmtruncus, v8i16x_info, v8i32x_info>;
9410 let Predicates = [HasAVX512] in {
9411 defm : mtrunc_lowering<"VPMOVDWZ", X86vmtrunc, v16i16x_info, v16i32_info>;
9412 defm : mtrunc_lowering<"VPMOVSDWZ", X86vmtruncs, v16i16x_info, v16i32_info>;
9413 defm : mtrunc_lowering<"VPMOVUSDWZ", X86vmtruncus, v16i16x_info, v16i32_info>;
9415 defm : mtrunc_lowering<"VPMOVDBZ", X86vmtrunc, v16i8x_info, v16i32_info>;
9416 defm : mtrunc_lowering<"VPMOVSDBZ", X86vmtruncs, v16i8x_info, v16i32_info>;
9417 defm : mtrunc_lowering<"VPMOVUSDBZ", X86vmtruncus, v16i8x_info, v16i32_info>;
9419 defm : mtrunc_lowering<"VPMOVQWZ", X86vmtrunc, v8i16x_info, v8i64_info>;
9420 defm : mtrunc_lowering<"VPMOVSQWZ", X86vmtruncs, v8i16x_info, v8i64_info>;
9421 defm : mtrunc_lowering<"VPMOVUSQWZ", X86vmtruncus, v8i16x_info, v8i64_info>;
9424 multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
9425 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
9426 X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
9427 let ExeDomain = DestInfo.ExeDomain in {
9428 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
9429 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
9430 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
9431 EVEX, Sched<[sched]>;
9433 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
9434 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
9435 (DestInfo.VT (LdFrag addr:$src))>,
9436 EVEX, Sched<[sched.Folded]>;
9440 multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr,
9441 SDNode OpNode, SDNode InVecNode, string ExtTy,
9442 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9443 let Predicates = [HasVLX, HasBWI] in {
9444 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info,
9445 v16i8x_info, i64mem, LdFrag, InVecNode>,
9446 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9448 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info,
9449 v16i8x_info, i128mem, LdFrag, OpNode>,
9450 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9452 let Predicates = [HasBWI] in {
9453 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info,
9454 v32i8x_info, i256mem, LdFrag, OpNode>,
9455 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9459 multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
9460 SDNode OpNode, SDNode InVecNode, string ExtTy,
9461 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9462 let Predicates = [HasVLX, HasAVX512] in {
9463 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9464 v16i8x_info, i32mem, LdFrag, InVecNode>,
9465 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9467 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9468 v16i8x_info, i64mem, LdFrag, InVecNode>,
9469 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9471 let Predicates = [HasAVX512] in {
9472 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9473 v16i8x_info, i128mem, LdFrag, OpNode>,
9474 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9478 multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
9479 SDNode OpNode, SDNode InVecNode, string ExtTy,
9480 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
9481 let Predicates = [HasVLX, HasAVX512] in {
9482 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9483 v16i8x_info, i16mem, LdFrag, InVecNode>,
9484 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
9486 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9487 v16i8x_info, i32mem, LdFrag, InVecNode>,
9488 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
9490 let Predicates = [HasAVX512] in {
9491 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9492 v16i8x_info, i64mem, LdFrag, InVecNode>,
9493 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
9497 multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr,
9498 SDNode OpNode, SDNode InVecNode, string ExtTy,
9499 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9500 let Predicates = [HasVLX, HasAVX512] in {
9501 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info,
9502 v8i16x_info, i64mem, LdFrag, InVecNode>,
9503 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
9505 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info,
9506 v8i16x_info, i128mem, LdFrag, OpNode>,
9507 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
9509 let Predicates = [HasAVX512] in {
9510 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info,
9511 v16i16x_info, i256mem, LdFrag, OpNode>,
9512 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
9516 multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr,
9517 SDNode OpNode, SDNode InVecNode, string ExtTy,
9518 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
9519 let Predicates = [HasVLX, HasAVX512] in {
9520 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9521 v8i16x_info, i32mem, LdFrag, InVecNode>,
9522 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
9524 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9525 v8i16x_info, i64mem, LdFrag, InVecNode>,
9526 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
9528 let Predicates = [HasAVX512] in {
9529 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9530 v8i16x_info, i128mem, LdFrag, OpNode>,
9531 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
9535 multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
9536 SDNode OpNode, SDNode InVecNode, string ExtTy,
9537 X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
9539 let Predicates = [HasVLX, HasAVX512] in {
9540 defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
9541 v4i32x_info, i64mem, LdFrag, InVecNode>,
9542 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
9544 defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info,
9545 v4i32x_info, i128mem, LdFrag, OpNode>,
9546 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
9548 let Predicates = [HasAVX512] in {
9549 defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info,
9550 v8i32x_info, i256mem, LdFrag, OpNode>,
9551 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
9555 defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
9556 defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
9557 defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
9558 defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
9559 defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
9560 defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
9562 defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
9563 defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
9564 defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
9565 defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
9566 defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
9567 defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
9570 // Patterns that we also need any extend versions of. aext_vector_inreg
9571 // is currently legalized to zext_vector_inreg.
9572 multiclass AVX512_pmovx_patterns_base<string OpcPrefix, SDNode ExtOp> {
9574 let Predicates = [HasVLX, HasBWI] in {
9575 def : Pat<(v16i16 (ExtOp (loadv16i8 addr:$src))),
9576 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
9579 let Predicates = [HasVLX] in {
9580 def : Pat<(v8i32 (ExtOp (loadv8i16 addr:$src))),
9581 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
9583 def : Pat<(v4i64 (ExtOp (loadv4i32 addr:$src))),
9584 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
9588 let Predicates = [HasBWI] in {
9589 def : Pat<(v32i16 (ExtOp (loadv32i8 addr:$src))),
9590 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
9592 let Predicates = [HasAVX512] in {
9593 def : Pat<(v16i32 (ExtOp (loadv16i8 addr:$src))),
9594 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
9595 def : Pat<(v16i32 (ExtOp (loadv16i16 addr:$src))),
9596 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
9598 def : Pat<(v8i64 (ExtOp (loadv8i16 addr:$src))),
9599 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
9601 def : Pat<(v8i64 (ExtOp (loadv8i32 addr:$src))),
9602 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
9606 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
9608 AVX512_pmovx_patterns_base<OpcPrefix, ExtOp> {
9610 let Predicates = [HasVLX, HasBWI] in {
9611 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9612 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9613 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9614 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9615 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9616 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
9618 let Predicates = [HasVLX] in {
9619 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9620 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9621 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9622 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
9624 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (extloadi32i16 addr:$src)))))),
9625 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
9627 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9628 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9629 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9630 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9631 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9632 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
9634 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9635 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9636 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (X86vzload32 addr:$src))))),
9637 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
9639 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9640 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9641 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9642 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9643 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))),
9644 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
9646 let Predicates = [HasVLX] in {
9647 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9648 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9649 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9650 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9651 def : Pat<(v8i32 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9652 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
9654 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
9655 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9656 def : Pat<(v4i64 (InVecOp (bc_v16i8 (v4i32 (X86vzload32 addr:$src))))),
9657 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
9659 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9660 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9661 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadf64 addr:$src)))))),
9662 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9663 def : Pat<(v4i64 (InVecOp (bc_v8i16 (v2i64 (X86vzload64 addr:$src))))),
9664 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
9667 let Predicates = [HasAVX512] in {
9668 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
9669 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9670 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
9671 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9672 def : Pat<(v8i64 (InVecOp (bc_v16i8 (v2i64 (X86vzload64 addr:$src))))),
9673 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
9677 defm : AVX512_pmovx_patterns<"VPMOVSX", sext, sext_invec>;
9678 defm : AVX512_pmovx_patterns<"VPMOVZX", zext, zext_invec>;
9680 // Without BWI we can't do a trunc from v16i16 to v16i8. DAG combine can merge
9681 // ext+trunc aggressively making it impossible to legalize the DAG to this
9682 // pattern directly.
9683 let Predicates = [HasAVX512, NoBWI] in {
9684 def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))),
9685 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrr VR256X:$src)))>;
9686 def: Pat<(v16i8 (trunc (loadv16i16 addr:$src))),
9687 (VPMOVDBZrr (v16i32 (VPMOVZXWDZrm addr:$src)))>;
9690 //===----------------------------------------------------------------------===//
9691 // GATHER - SCATTER Operations
9693 // FIXME: Improve scheduling of gather/scatter instructions.
9694 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9695 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9696 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
9697 ExeDomain = _.ExeDomain, mayLoad = 1, hasSideEffects = 0 in
9698 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, MaskRC:$mask_wb),
9699 (ins _.RC:$src1, MaskRC:$mask, memop:$src2),
9700 !strconcat(OpcodeStr#_.Suffix,
9701 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
9702 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
9705 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
9706 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9707 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
9708 vy512xmem>, EVEX_V512, VEX_W;
9709 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
9710 vz512mem>, EVEX_V512, VEX_W;
9711 let Predicates = [HasVLX] in {
9712 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9713 vx256xmem>, EVEX_V256, VEX_W;
9714 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
9715 vy256xmem>, EVEX_V256, VEX_W;
9716 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9717 vx128xmem>, EVEX_V128, VEX_W;
9718 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9719 vx128xmem>, EVEX_V128, VEX_W;
9723 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
9724 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9725 defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9727 defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9729 let Predicates = [HasVLX] in {
9730 defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
9731 vy256xmem>, EVEX_V256;
9732 defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9733 vy128xmem>, EVEX_V256;
9734 defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
9735 vx128xmem>, EVEX_V128;
9736 defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
9737 vx64xmem, VK2WM>, EVEX_V128;
9742 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
9743 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
9745 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
9746 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
9748 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
9749 X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
9751 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
9752 hasSideEffects = 0 in
9754 def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
9755 (ins memop:$dst, MaskRC:$mask, _.RC:$src),
9756 !strconcat(OpcodeStr#_.Suffix,
9757 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
9758 []>, EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9759 Sched<[WriteStore]>;
9762 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
9763 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9764 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
9765 vy512xmem>, EVEX_V512, VEX_W;
9766 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
9767 vz512mem>, EVEX_V512, VEX_W;
9768 let Predicates = [HasVLX] in {
9769 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9770 vx256xmem>, EVEX_V256, VEX_W;
9771 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
9772 vy256xmem>, EVEX_V256, VEX_W;
9773 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9774 vx128xmem>, EVEX_V128, VEX_W;
9775 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9776 vx128xmem>, EVEX_V128, VEX_W;
9780 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
9781 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
9782 defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512, vz512mem>,
9784 defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info256, vz256mem>,
9786 let Predicates = [HasVLX] in {
9787 defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
9788 vy256xmem>, EVEX_V256;
9789 defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9790 vy128xmem>, EVEX_V256;
9791 defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
9792 vx128xmem>, EVEX_V128;
9793 defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
9794 vx64xmem, VK2WM>, EVEX_V128;
9798 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
9799 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
9801 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
9802 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
9805 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
9806 RegisterClass KRC, X86MemOperand memop> {
9807 let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
9808 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
9809 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
9810 EVEX, EVEX_K, Sched<[WriteLoad]>;
9813 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
9814 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9816 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
9817 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9819 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
9820 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9822 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
9823 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9825 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
9826 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9828 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
9829 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9831 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
9832 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9834 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
9835 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9837 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
9838 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9840 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
9841 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9843 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
9844 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9846 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
9847 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9849 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
9850 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
9852 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
9853 VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
9855 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
9856 VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
9858 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
9859 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
9861 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
9862 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
9863 !strconcat(OpcodeStr#Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
9864 [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>,
9865 EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc?
9868 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
9869 string OpcodeStr, Predicate prd> {
9870 let Predicates = [prd] in
9871 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
9873 let Predicates = [prd, HasVLX] in {
9874 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
9875 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
9879 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
9880 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
9881 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
9882 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
9884 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
9885 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
9886 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
9887 [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))]>,
9888 EVEX, Sched<[WriteMove]>;
9891 // Use 512bit version to implement 128/256 bit in case NoVLX.
9892 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
9896 def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))),
9897 (_.KVT (COPY_TO_REGCLASS
9898 (!cast<Instruction>(Name#"Zrr")
9899 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
9900 _.RC:$src, _.SubRegIdx)),
9904 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
9905 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9906 let Predicates = [prd] in
9907 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
9910 let Predicates = [prd, HasVLX] in {
9911 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
9913 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
9916 let Predicates = [prd, NoVLX] in {
9917 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256, NAME>;
9918 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128, NAME>;
9922 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
9923 avx512vl_i8_info, HasBWI>;
9924 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
9925 avx512vl_i16_info, HasBWI>, VEX_W;
9926 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
9927 avx512vl_i32_info, HasDQI>;
9928 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
9929 avx512vl_i64_info, HasDQI>, VEX_W;
9931 // Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
9932 // is available, but BWI is not. We can't handle this in lowering because
9933 // a target independent DAG combine likes to combine sext and trunc.
9934 let Predicates = [HasDQI, NoBWI] in {
9935 def : Pat<(v16i8 (sext (v16i1 VK16:$src))),
9936 (VPMOVDBZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9937 def : Pat<(v16i16 (sext (v16i1 VK16:$src))),
9938 (VPMOVDWZrr (v16i32 (VPMOVM2DZrr VK16:$src)))>;
9941 let Predicates = [HasDQI, NoBWI, HasVLX] in {
9942 def : Pat<(v8i16 (sext (v8i1 VK8:$src))),
9943 (VPMOVDWZ256rr (v8i32 (VPMOVM2DZ256rr VK8:$src)))>;
9946 //===----------------------------------------------------------------------===//
9947 // AVX-512 - COMPRESS and EXPAND
9950 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
9951 string OpcodeStr, X86FoldableSchedWrite sched> {
9952 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
9953 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
9954 (null_frag)>, AVX5128IBase,
9957 let mayStore = 1, hasSideEffects = 0 in
9958 def mr : AVX5128I<opc, MRMDestMem, (outs),
9959 (ins _.MemOp:$dst, _.RC:$src),
9960 OpcodeStr # "\t{$src, $dst|$dst, $src}",
9961 []>, EVEX_CD8<_.EltSize, CD8VT1>,
9962 Sched<[sched.Folded]>;
9964 def mrk : AVX5128I<opc, MRMDestMem, (outs),
9965 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
9966 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
9968 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
9969 Sched<[sched.Folded]>;
9972 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
9973 def : Pat<(X86mCompressingStore (_.VT _.RC:$src), addr:$dst, _.KRCWM:$mask),
9974 (!cast<Instruction>(Name#_.ZSuffix#mrk)
9975 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
9977 def : Pat<(X86compress (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
9978 (!cast<Instruction>(Name#_.ZSuffix#rrk)
9979 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
9980 def : Pat<(X86compress (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
9981 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
9982 _.KRCWM:$mask, _.RC:$src)>;
9985 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
9986 X86FoldableSchedWrite sched,
9987 AVX512VLVectorVTInfo VTInfo,
9988 Predicate Pred = HasAVX512> {
9989 let Predicates = [Pred] in
9990 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>,
9991 compress_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
9993 let Predicates = [Pred, HasVLX] in {
9994 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>,
9995 compress_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
9996 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>,
9997 compress_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10001 // FIXME: Is there a better scheduler class for VPCOMPRESS?
10002 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
10003 avx512vl_i32_info>, EVEX, NotMemoryFoldable;
10004 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
10005 avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
10006 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
10007 avx512vl_f32_info>, EVEX, NotMemoryFoldable;
10008 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
10009 avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
10012 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
10013 string OpcodeStr, X86FoldableSchedWrite sched> {
10014 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10015 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
10016 (null_frag)>, AVX5128IBase,
10019 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10020 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
10022 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>,
10023 Sched<[sched.Folded, sched.ReadAfterFold]>;
10026 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
10028 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
10029 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10030 _.KRCWM:$mask, addr:$src)>;
10032 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
10033 (!cast<Instruction>(Name#_.ZSuffix#rmkz)
10034 _.KRCWM:$mask, addr:$src)>;
10036 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
10037 (_.VT _.RC:$src0))),
10038 (!cast<Instruction>(Name#_.ZSuffix#rmk)
10039 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
10041 def : Pat<(X86expand (_.VT _.RC:$src), _.RC:$src0, _.KRCWM:$mask),
10042 (!cast<Instruction>(Name#_.ZSuffix#rrk)
10043 _.RC:$src0, _.KRCWM:$mask, _.RC:$src)>;
10044 def : Pat<(X86expand (_.VT _.RC:$src), _.ImmAllZerosV, _.KRCWM:$mask),
10045 (!cast<Instruction>(Name#_.ZSuffix#rrkz)
10046 _.KRCWM:$mask, _.RC:$src)>;
10049 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
10050 X86FoldableSchedWrite sched,
10051 AVX512VLVectorVTInfo VTInfo,
10052 Predicate Pred = HasAVX512> {
10053 let Predicates = [Pred] in
10054 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>,
10055 expand_by_vec_width_lowering<VTInfo.info512, NAME>, EVEX_V512;
10057 let Predicates = [Pred, HasVLX] in {
10058 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>,
10059 expand_by_vec_width_lowering<VTInfo.info256, NAME>, EVEX_V256;
10060 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>,
10061 expand_by_vec_width_lowering<VTInfo.info128, NAME>, EVEX_V128;
10065 // FIXME: Is there a better scheduler class for VPEXPAND?
10066 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
10067 avx512vl_i32_info>, EVEX;
10068 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
10069 avx512vl_i64_info>, EVEX, VEX_W;
10070 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
10071 avx512vl_f32_info>, EVEX;
10072 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
10073 avx512vl_f64_info>, EVEX, VEX_W;
10075 //handle instruction reg_vec1 = op(reg_vec,imm)
10077 // op(broadcast(eltVt),imm)
10078 //all instruction created with FROUND_CURRENT
10079 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr,
10080 SDNode OpNode, SDNode MaskOpNode,
10081 X86FoldableSchedWrite sched,
10082 X86VectorVTInfo _> {
10083 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10084 defm rri : AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
10085 (ins _.RC:$src1, i32u8imm:$src2),
10086 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10087 (OpNode (_.VT _.RC:$src1), (i32 timm:$src2)),
10088 (MaskOpNode (_.VT _.RC:$src1), (i32 timm:$src2))>,
10090 defm rmi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10091 (ins _.MemOp:$src1, i32u8imm:$src2),
10092 OpcodeStr#_.Suffix, "$src2, $src1", "$src1, $src2",
10093 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10095 (MaskOpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
10096 (i32 timm:$src2))>,
10097 Sched<[sched.Folded, sched.ReadAfterFold]>;
10098 defm rmbi : AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
10099 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
10100 OpcodeStr#_.Suffix, "$src2, ${src1}"#_.BroadcastStr,
10101 "${src1}"#_.BroadcastStr#", $src2",
10102 (OpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10104 (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src1)),
10105 (i32 timm:$src2))>, EVEX_B,
10106 Sched<[sched.Folded, sched.ReadAfterFold]>;
10110 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10111 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10112 SDNode OpNode, X86FoldableSchedWrite sched,
10113 X86VectorVTInfo _> {
10114 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10115 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10116 (ins _.RC:$src1, i32u8imm:$src2),
10117 OpcodeStr#_.Suffix, "$src2, {sae}, $src1",
10118 "$src1, {sae}, $src2",
10119 (OpNode (_.VT _.RC:$src1),
10120 (i32 timm:$src2))>,
10121 EVEX_B, Sched<[sched]>;
10124 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
10125 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10126 SDNode MaskOpNode, SDNode OpNodeSAE, X86SchedWriteWidths sched,
10128 let Predicates = [prd] in {
10129 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10130 sched.ZMM, _.info512>,
10131 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE,
10132 sched.ZMM, _.info512>, EVEX_V512;
10134 let Predicates = [prd, HasVLX] in {
10135 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10136 sched.XMM, _.info128>, EVEX_V128;
10137 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, MaskOpNode,
10138 sched.YMM, _.info256>, EVEX_V256;
10142 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10143 // op(reg_vec2,mem_vec,imm)
10144 // op(reg_vec2,broadcast(eltVt),imm)
10145 //all instruction created with FROUND_CURRENT
10146 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10147 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10148 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10149 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10150 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10151 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10152 (OpNode (_.VT _.RC:$src1),
10154 (i32 timm:$src3))>,
10156 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10157 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
10158 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10159 (OpNode (_.VT _.RC:$src1),
10160 (_.VT (bitconvert (_.LdFrag addr:$src2))),
10161 (i32 timm:$src3))>,
10162 Sched<[sched.Folded, sched.ReadAfterFold]>;
10163 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10164 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
10165 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10166 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10167 (OpNode (_.VT _.RC:$src1),
10168 (_.VT (_.BroadcastLdFrag addr:$src2)),
10169 (i32 timm:$src3))>, EVEX_B,
10170 Sched<[sched.Folded, sched.ReadAfterFold]>;
10174 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10175 // op(reg_vec2,mem_vec,imm)
10176 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10177 X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo,
10178 X86VectorVTInfo SrcInfo>{
10179 let ExeDomain = DestInfo.ExeDomain in {
10180 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
10181 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
10182 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10183 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10184 (SrcInfo.VT SrcInfo.RC:$src2),
10185 (i8 timm:$src3)))>,
10187 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
10188 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
10189 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10190 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
10191 (SrcInfo.VT (bitconvert
10192 (SrcInfo.LdFrag addr:$src2))),
10193 (i8 timm:$src3)))>,
10194 Sched<[sched.Folded, sched.ReadAfterFold]>;
10198 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10199 // op(reg_vec2,mem_vec,imm)
10200 // op(reg_vec2,broadcast(eltVt),imm)
10201 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
10202 X86FoldableSchedWrite sched, X86VectorVTInfo _>:
10203 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{
10205 let ExeDomain = _.ExeDomain in
10206 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10207 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10208 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10209 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10210 (OpNode (_.VT _.RC:$src1),
10211 (_.VT (_.BroadcastLdFrag addr:$src2)),
10212 (i8 timm:$src3))>, EVEX_B,
10213 Sched<[sched.Folded, sched.ReadAfterFold]>;
10216 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
10217 // op(reg_vec2,mem_scalar,imm)
10218 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10219 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10220 let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
10221 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10222 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10223 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10224 (OpNode (_.VT _.RC:$src1),
10226 (i32 timm:$src3))>,
10228 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10229 (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3),
10230 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10231 (OpNode (_.VT _.RC:$src1),
10232 (_.ScalarIntMemFrags addr:$src2),
10233 (i32 timm:$src3))>,
10234 Sched<[sched.Folded, sched.ReadAfterFold]>;
10238 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10239 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
10240 SDNode OpNode, X86FoldableSchedWrite sched,
10241 X86VectorVTInfo _> {
10242 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10243 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10244 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10245 OpcodeStr, "$src3, {sae}, $src2, $src1",
10246 "$src1, $src2, {sae}, $src3",
10247 (OpNode (_.VT _.RC:$src1),
10249 (i32 timm:$src3))>,
10250 EVEX_B, Sched<[sched]>;
10253 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
10254 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10255 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10256 let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
10257 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10258 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
10259 OpcodeStr, "$src3, {sae}, $src2, $src1",
10260 "$src1, $src2, {sae}, $src3",
10261 (OpNode (_.VT _.RC:$src1),
10263 (i32 timm:$src3))>,
10264 EVEX_B, Sched<[sched]>;
10267 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
10268 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode,
10269 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd>{
10270 let Predicates = [prd] in {
10271 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10272 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeSAE, sched.ZMM, _.info512>,
10276 let Predicates = [prd, HasVLX] in {
10277 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10279 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10284 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
10285 X86SchedWriteWidths sched, AVX512VLVectorVTInfo DestInfo,
10286 AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
10287 let Predicates = [Pred] in {
10288 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.ZMM, DestInfo.info512,
10289 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
10291 let Predicates = [Pred, HasVLX] in {
10292 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.XMM, DestInfo.info128,
10293 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
10294 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched.YMM, DestInfo.info256,
10295 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
10299 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
10300 bits<8> opc, SDNode OpNode, X86SchedWriteWidths sched,
10301 Predicate Pred = HasAVX512> {
10302 let Predicates = [Pred] in {
10303 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>,
10306 let Predicates = [Pred, HasVLX] in {
10307 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.XMM, _.info128>,
10309 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched.YMM, _.info256>,
10314 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
10315 X86VectorVTInfo _, bits<8> opc, SDNode OpNode,
10316 SDNode OpNodeSAE, X86SchedWriteWidths sched, Predicate prd> {
10317 let Predicates = [prd] in {
10318 defm Z : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched.XMM, _>,
10319 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeSAE, sched.XMM, _>;
10323 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
10324 bits<8> opcPs, bits<8> opcPd, SDNode OpNode,
10325 SDNode MaskOpNode, SDNode OpNodeSAE,
10326 X86SchedWriteWidths sched, Predicate prd>{
10327 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
10328 opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10329 EVEX_CD8<32, CD8VF>;
10330 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
10331 opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
10332 EVEX_CD8<64, CD8VF>, VEX_W;
10335 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
10336 X86VReduce, X86VReduce, X86VReduceSAE,
10337 SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX;
10338 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
10339 X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
10340 SchedWriteFRnd, HasAVX512>,
10341 AVX512AIi8Base, EVEX;
10342 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
10343 X86VGetMant, X86VGetMant, X86VGetMantSAE,
10344 SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX;
10346 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
10347 0x50, X86VRange, X86VRangeSAE,
10348 SchedWriteFAdd, HasDQI>,
10349 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10350 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
10351 0x50, X86VRange, X86VRangeSAE,
10352 SchedWriteFAdd, HasDQI>,
10353 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10355 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
10356 f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10357 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10358 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
10359 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
10360 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10362 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
10363 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10364 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10365 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
10366 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
10367 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10369 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
10370 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10371 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10372 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
10373 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
10374 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10376 multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
10377 X86FoldableSchedWrite sched,
10379 X86VectorVTInfo CastInfo,
10380 string EVEX2VEXOvrd> {
10381 let ExeDomain = _.ExeDomain in {
10382 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10383 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10384 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10386 (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
10387 (i8 timm:$src3)))))>,
10388 Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
10389 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10390 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10391 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10394 (CastInfo.VT (X86Shuf128 _.RC:$src1,
10395 (CastInfo.LdFrag addr:$src2),
10396 (i8 timm:$src3)))))>,
10397 Sched<[sched.Folded, sched.ReadAfterFold]>,
10398 EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
10399 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10400 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10401 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10402 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10406 (X86Shuf128 _.RC:$src1,
10407 (_.BroadcastLdFrag addr:$src2),
10408 (i8 timm:$src3)))))>, EVEX_B,
10409 Sched<[sched.Folded, sched.ReadAfterFold]>;
10413 multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched,
10414 AVX512VLVectorVTInfo _,
10415 AVX512VLVectorVTInfo CastInfo, bits<8> opc,
10416 string EVEX2VEXOvrd>{
10417 let Predicates = [HasAVX512] in
10418 defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10419 _.info512, CastInfo.info512, "">, EVEX_V512;
10421 let Predicates = [HasAVX512, HasVLX] in
10422 defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched,
10423 _.info256, CastInfo.info256,
10424 EVEX2VEXOvrd>, EVEX_V256;
10427 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
10428 avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10429 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
10430 avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10431 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
10432 avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
10433 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
10434 avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
10436 let Predicates = [HasAVX512] in {
10437 // Provide fallback in case the load node that is used in the broadcast
10438 // patterns above is used by additional users, which prevents the pattern
10440 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
10441 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10442 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10444 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
10445 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10446 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10449 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
10450 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10451 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10453 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
10454 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10455 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10458 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
10459 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10460 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10463 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
10464 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10465 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
10469 multiclass avx512_valign<bits<8> opc, string OpcodeStr,
10470 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
10471 // NOTE: EVEX2VEXOverride changed back to Unset for 256-bit at the
10472 // instantiation of this class.
10473 let ExeDomain = _.ExeDomain in {
10474 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10475 (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
10476 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10477 (_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
10478 Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
10479 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10480 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
10481 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
10482 (_.VT (X86VAlign _.RC:$src1,
10483 (bitconvert (_.LdFrag addr:$src2)),
10484 (i8 timm:$src3)))>,
10485 Sched<[sched.Folded, sched.ReadAfterFold]>,
10486 EVEX2VEXOverride<"VPALIGNRrmi">;
10488 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10489 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10490 OpcodeStr, "$src3, ${src2}"#_.BroadcastStr#", $src1",
10491 "$src1, ${src2}"#_.BroadcastStr#", $src3",
10492 (X86VAlign _.RC:$src1,
10493 (_.VT (_.BroadcastLdFrag addr:$src2)),
10494 (i8 timm:$src3))>, EVEX_B,
10495 Sched<[sched.Folded, sched.ReadAfterFold]>;
10499 multiclass avx512_valign_common<string OpcodeStr, X86SchedWriteWidths sched,
10500 AVX512VLVectorVTInfo _> {
10501 let Predicates = [HasAVX512] in {
10502 defm Z : avx512_valign<0x03, OpcodeStr, sched.ZMM, _.info512>,
10503 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10505 let Predicates = [HasAVX512, HasVLX] in {
10506 defm Z128 : avx512_valign<0x03, OpcodeStr, sched.XMM, _.info128>,
10507 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10508 // We can't really override the 256-bit version so change it back to unset.
10509 let EVEX2VEXOverride = ? in
10510 defm Z256 : avx512_valign<0x03, OpcodeStr, sched.YMM, _.info256>,
10511 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10515 defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
10516 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
10517 defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
10518 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
10521 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
10522 SchedWriteShuffle, avx512vl_i8_info,
10523 avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
10525 // Fragments to help convert valignq into masked valignd. Or valignq/valignd
10527 def ValignqImm32XForm : SDNodeXForm<timm, [{
10528 return getI8Imm(N->getZExtValue() * 2, SDLoc(N));
10530 def ValignqImm8XForm : SDNodeXForm<timm, [{
10531 return getI8Imm(N->getZExtValue() * 8, SDLoc(N));
10533 def ValigndImm8XForm : SDNodeXForm<timm, [{
10534 return getI8Imm(N->getZExtValue() * 4, SDLoc(N));
10537 multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
10538 X86VectorVTInfo From, X86VectorVTInfo To,
10539 SDNodeXForm ImmXForm> {
10540 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10542 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10545 (!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
10546 To.RC:$src1, To.RC:$src2,
10547 (ImmXForm timm:$src3))>;
10549 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10551 (From.VT (OpNode From.RC:$src1, From.RC:$src2,
10554 (!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
10555 To.RC:$src1, To.RC:$src2,
10556 (ImmXForm timm:$src3))>;
10558 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10560 (From.VT (OpNode From.RC:$src1,
10561 (From.LdFrag addr:$src2),
10564 (!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
10565 To.RC:$src1, addr:$src2,
10566 (ImmXForm timm:$src3))>;
10568 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10570 (From.VT (OpNode From.RC:$src1,
10571 (From.LdFrag addr:$src2),
10574 (!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
10575 To.RC:$src1, addr:$src2,
10576 (ImmXForm timm:$src3))>;
10579 multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
10580 X86VectorVTInfo From,
10581 X86VectorVTInfo To,
10582 SDNodeXForm ImmXForm> :
10583 avx512_vpalign_mask_lowering<OpcodeStr, OpNode, From, To, ImmXForm> {
10584 def : Pat<(From.VT (OpNode From.RC:$src1,
10585 (bitconvert (To.VT (To.BroadcastLdFrag addr:$src2))),
10587 (!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
10588 (ImmXForm timm:$src3))>;
10590 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10592 (From.VT (OpNode From.RC:$src1,
10594 (To.VT (To.BroadcastLdFrag addr:$src2))),
10597 (!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
10598 To.RC:$src1, addr:$src2,
10599 (ImmXForm timm:$src3))>;
10601 def : Pat<(To.VT (vselect_mask To.KRCWM:$mask,
10603 (From.VT (OpNode From.RC:$src1,
10605 (To.VT (To.BroadcastLdFrag addr:$src2))),
10608 (!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
10609 To.RC:$src1, addr:$src2,
10610 (ImmXForm timm:$src3))>;
10613 let Predicates = [HasAVX512] in {
10614 // For 512-bit we lower to the widest element type we can. So we only need
10615 // to handle converting valignq to valignd.
10616 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ", X86VAlign, v8i64_info,
10617 v16i32_info, ValignqImm32XForm>;
10620 let Predicates = [HasVLX] in {
10621 // For 128-bit we lower to the widest element type we can. So we only need
10622 // to handle converting valignq to valignd.
10623 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ128", X86VAlign, v2i64x_info,
10624 v4i32x_info, ValignqImm32XForm>;
10625 // For 256-bit we lower to the widest element type we can. So we only need
10626 // to handle converting valignq to valignd.
10627 defm : avx512_vpalign_mask_lowering_mb<"VALIGNDZ256", X86VAlign, v4i64x_info,
10628 v8i32x_info, ValignqImm32XForm>;
10631 let Predicates = [HasVLX, HasBWI] in {
10632 // We can turn 128 and 256 bit VALIGND/VALIGNQ into VPALIGNR.
10633 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v2i64x_info,
10634 v16i8x_info, ValignqImm8XForm>;
10635 defm : avx512_vpalign_mask_lowering<"VPALIGNRZ128", X86VAlign, v4i32x_info,
10636 v16i8x_info, ValigndImm8XForm>;
10639 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
10640 SchedWritePSADBW, avx512vl_i16_info, avx512vl_i8_info>,
10641 EVEX_CD8<8, CD8VF>, NotEVEX2VEXConvertible;
10643 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
10644 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10645 let ExeDomain = _.ExeDomain in {
10646 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10647 (ins _.RC:$src1), OpcodeStr,
10649 (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase,
10652 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10653 (ins _.MemOp:$src1), OpcodeStr,
10655 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>,
10656 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>,
10657 Sched<[sched.Folded]>;
10661 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
10662 X86FoldableSchedWrite sched, X86VectorVTInfo _> :
10663 avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> {
10664 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10665 (ins _.ScalarMemOp:$src1), OpcodeStr,
10666 "${src1}"#_.BroadcastStr,
10667 "${src1}"#_.BroadcastStr,
10668 (_.VT (OpNode (_.VT (_.BroadcastLdFrag addr:$src1))))>,
10669 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
10670 Sched<[sched.Folded]>;
10673 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10674 X86SchedWriteWidths sched,
10675 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
10676 let Predicates = [prd] in
10677 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10680 let Predicates = [prd, HasVLX] in {
10681 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10683 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10688 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
10689 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo,
10691 let Predicates = [prd] in
10692 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.ZMM, VTInfo.info512>,
10695 let Predicates = [prd, HasVLX] in {
10696 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.YMM, VTInfo.info256>,
10698 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched.XMM, VTInfo.info128>,
10703 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
10704 SDNode OpNode, X86SchedWriteWidths sched,
10706 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
10707 avx512vl_i64_info, prd>, VEX_W;
10708 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
10709 avx512vl_i32_info, prd>;
10712 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
10713 SDNode OpNode, X86SchedWriteWidths sched,
10715 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
10716 avx512vl_i16_info, prd>, VEX_WIG;
10717 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
10718 avx512vl_i8_info, prd>, VEX_WIG;
10721 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
10722 bits<8> opc_d, bits<8> opc_q,
10723 string OpcodeStr, SDNode OpNode,
10724 X86SchedWriteWidths sched> {
10725 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched,
10727 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched,
10731 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs,
10734 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
10735 let Predicates = [HasAVX512, NoVLX] in {
10736 def : Pat<(v4i64 (abs VR256X:$src)),
10739 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
10741 def : Pat<(v2i64 (abs VR128X:$src)),
10744 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
10748 // Use 512bit version to implement 128/256 bit.
10749 multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode,
10750 AVX512VLVectorVTInfo _, Predicate prd> {
10751 let Predicates = [prd, NoVLX] in {
10752 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))),
10754 (!cast<Instruction>(InstrStr # "Zrr")
10755 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10756 _.info256.RC:$src1,
10757 _.info256.SubRegIdx)),
10758 _.info256.SubRegIdx)>;
10760 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))),
10762 (!cast<Instruction>(InstrStr # "Zrr")
10763 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
10764 _.info128.RC:$src1,
10765 _.info128.SubRegIdx)),
10766 _.info128.SubRegIdx)>;
10770 defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz,
10771 SchedWriteVecIMul, HasCDI>;
10773 // FIXME: Is there a better scheduler class for VPCONFLICT?
10774 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict,
10775 SchedWriteVecALU, HasCDI>;
10777 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
10778 defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>;
10779 defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>;
10781 //===---------------------------------------------------------------------===//
10782 // Counts number of ones - VPOPCNTD and VPOPCNTQ
10783 //===---------------------------------------------------------------------===//
10785 // FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ?
10786 defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop,
10787 SchedWriteVecALU, HasVPOPCNTDQ>;
10789 defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>;
10790 defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
10792 //===---------------------------------------------------------------------===//
10793 // Replicate Single FP - MOVSHDUP and MOVSLDUP
10794 //===---------------------------------------------------------------------===//
10796 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode,
10797 X86SchedWriteWidths sched> {
10798 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched,
10799 avx512vl_f32_info, HasAVX512>, XS;
10802 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup,
10803 SchedWriteFShuffle>;
10804 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup,
10805 SchedWriteFShuffle>;
10807 //===----------------------------------------------------------------------===//
10808 // AVX-512 - MOVDDUP
10809 //===----------------------------------------------------------------------===//
10811 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
10812 X86FoldableSchedWrite sched, X86VectorVTInfo _> {
10813 let ExeDomain = _.ExeDomain in {
10814 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
10815 (ins _.RC:$src), OpcodeStr, "$src", "$src",
10816 (_.VT (X86VBroadcast (_.VT _.RC:$src)))>, EVEX,
10818 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
10819 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
10820 (_.VT (_.BroadcastLdFrag addr:$src))>,
10821 EVEX, EVEX_CD8<_.EltSize, CD8VH>,
10822 Sched<[sched.Folded]>;
10826 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
10827 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
10828 defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
10829 VTInfo.info512>, EVEX_V512;
10831 let Predicates = [HasAVX512, HasVLX] in {
10832 defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.YMM,
10833 VTInfo.info256>, EVEX_V256;
10834 defm Z128 : avx512_movddup_128<opc, OpcodeStr, sched.XMM,
10835 VTInfo.info128>, EVEX_V128;
10839 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
10840 X86SchedWriteWidths sched> {
10841 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
10842 avx512vl_f64_info>, XD, VEX_W;
10845 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
10847 let Predicates = [HasVLX] in {
10848 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
10849 (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10851 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10852 (v2f64 VR128X:$src0)),
10853 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
10854 (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10855 def : Pat<(vselect_mask (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
10857 (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>;
10860 //===----------------------------------------------------------------------===//
10861 // AVX-512 - Unpack Instructions
10862 //===----------------------------------------------------------------------===//
10864 let Uses = []<Register>, mayRaiseFPException = 0 in {
10865 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, X86Unpckh, HasAVX512,
10866 SchedWriteFShuffleSizes, 0, 1>;
10867 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, X86Unpckl, HasAVX512,
10868 SchedWriteFShuffleSizes>;
10871 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
10872 SchedWriteShuffle, HasBWI>;
10873 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
10874 SchedWriteShuffle, HasBWI>;
10875 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
10876 SchedWriteShuffle, HasBWI>;
10877 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
10878 SchedWriteShuffle, HasBWI>;
10880 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
10881 SchedWriteShuffle, HasAVX512>;
10882 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
10883 SchedWriteShuffle, HasAVX512>;
10884 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
10885 SchedWriteShuffle, HasAVX512>;
10886 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
10887 SchedWriteShuffle, HasAVX512>;
10889 //===----------------------------------------------------------------------===//
10890 // AVX-512 - Extract & Insert Integer Instructions
10891 //===----------------------------------------------------------------------===//
10893 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10894 X86VectorVTInfo _> {
10895 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
10896 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10897 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10898 [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))),
10900 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecExtractSt]>;
10903 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
10904 let Predicates = [HasBWI] in {
10905 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
10906 (ins _.RC:$src1, u8imm:$src2),
10907 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10908 [(set GR32orGR64:$dst,
10909 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
10910 EVEX, TAPD, Sched<[WriteVecExtract]>;
10912 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
10916 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
10917 let Predicates = [HasBWI] in {
10918 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
10919 (ins _.RC:$src1, u8imm:$src2),
10920 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10921 [(set GR32orGR64:$dst,
10922 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
10923 EVEX, PD, Sched<[WriteVecExtract]>;
10925 let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
10926 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
10927 (ins _.RC:$src1, u8imm:$src2),
10928 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
10929 EVEX, TAPD, FoldGenData<NAME#rr>,
10930 Sched<[WriteVecExtract]>;
10932 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
10936 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
10937 RegisterClass GRC> {
10938 let Predicates = [HasDQI] in {
10939 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
10940 (ins _.RC:$src1, u8imm:$src2),
10941 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10943 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
10944 EVEX, TAPD, Sched<[WriteVecExtract]>;
10946 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
10947 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
10948 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
10949 [(store (extractelt (_.VT _.RC:$src1),
10950 imm:$src2),addr:$dst)]>,
10951 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD,
10952 Sched<[WriteVecExtractSt]>;
10956 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
10957 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
10958 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
10959 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
10961 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
10962 X86VectorVTInfo _, PatFrag LdFrag> {
10963 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
10964 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
10965 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10967 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
10968 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteVecInsert.Folded, WriteVecInsert.ReadAfterFold]>;
10971 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
10972 X86VectorVTInfo _, PatFrag LdFrag> {
10973 let Predicates = [HasBWI] in {
10974 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10975 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
10976 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10978 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V,
10979 Sched<[WriteVecInsert]>;
10981 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
10985 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
10986 X86VectorVTInfo _, RegisterClass GRC> {
10987 let Predicates = [HasDQI] in {
10988 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
10989 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
10990 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
10992 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
10993 EVEX_4V, TAPD, Sched<[WriteVecInsert]>;
10995 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
10996 _.ScalarLdFrag>, TAPD;
11000 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
11001 extloadi8>, TAPD, VEX_WIG;
11002 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
11003 extloadi16>, PD, VEX_WIG;
11004 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
11005 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
11007 //===----------------------------------------------------------------------===//
11008 // VSHUFPS - VSHUFPD Operations
11009 //===----------------------------------------------------------------------===//
11011 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
11012 AVX512VLVectorVTInfo VTInfo_FP>{
11013 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
11014 SchedWriteFShuffle>,
11015 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
11016 AVX512AIi8Base, EVEX_4V;
11019 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
11020 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
11022 //===----------------------------------------------------------------------===//
11023 // AVX-512 - Byte shift Left/Right
11024 //===----------------------------------------------------------------------===//
11026 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
11027 Format MRMm, string OpcodeStr,
11028 X86FoldableSchedWrite sched, X86VectorVTInfo _>{
11029 def ri : AVX512<opc, MRMr,
11030 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
11031 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11032 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
11034 def mi : AVX512<opc, MRMm,
11035 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
11036 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11037 [(set _.RC:$dst,(_.VT (OpNode
11038 (_.VT (bitconvert (_.LdFrag addr:$src1))),
11039 (i8 timm:$src2))))]>,
11040 Sched<[sched.Folded, sched.ReadAfterFold]>;
11043 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
11044 Format MRMm, string OpcodeStr,
11045 X86SchedWriteWidths sched, Predicate prd>{
11046 let Predicates = [prd] in
11047 defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11048 sched.ZMM, v64i8_info>, EVEX_V512;
11049 let Predicates = [prd, HasVLX] in {
11050 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11051 sched.YMM, v32i8x_info>, EVEX_V256;
11052 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, OpcodeStr,
11053 sched.XMM, v16i8x_info>, EVEX_V128;
11056 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
11057 SchedWriteShuffle, HasBWI>,
11058 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11059 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
11060 SchedWriteShuffle, HasBWI>,
11061 AVX512PDIi8Base, EVEX_4V, VEX_WIG;
11063 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
11064 string OpcodeStr, X86FoldableSchedWrite sched,
11065 X86VectorVTInfo _dst, X86VectorVTInfo _src> {
11066 let isCommutable = 1 in
11067 def rr : AVX512BI<opc, MRMSrcReg,
11068 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
11069 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11070 [(set _dst.RC:$dst,(_dst.VT
11071 (OpNode (_src.VT _src.RC:$src1),
11072 (_src.VT _src.RC:$src2))))]>,
11074 def rm : AVX512BI<opc, MRMSrcMem,
11075 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
11076 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
11077 [(set _dst.RC:$dst,(_dst.VT
11078 (OpNode (_src.VT _src.RC:$src1),
11079 (_src.VT (bitconvert
11080 (_src.LdFrag addr:$src2))))))]>,
11081 Sched<[sched.Folded, sched.ReadAfterFold]>;
11084 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
11085 string OpcodeStr, X86SchedWriteWidths sched,
11087 let Predicates = [prd] in
11088 defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.ZMM,
11089 v8i64_info, v64i8_info>, EVEX_V512;
11090 let Predicates = [prd, HasVLX] in {
11091 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.YMM,
11092 v4i64x_info, v32i8x_info>, EVEX_V256;
11093 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched.XMM,
11094 v2i64x_info, v16i8x_info>, EVEX_V128;
11098 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
11099 SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
11101 // Transforms to swizzle an immediate to enable better matching when
11102 // memory operand isn't in the right place.
11103 def VPTERNLOG321_imm8 : SDNodeXForm<timm, [{
11104 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
11105 uint8_t Imm = N->getZExtValue();
11106 // Swap bits 1/4 and 3/6.
11107 uint8_t NewImm = Imm & 0xa5;
11108 if (Imm & 0x02) NewImm |= 0x10;
11109 if (Imm & 0x10) NewImm |= 0x02;
11110 if (Imm & 0x08) NewImm |= 0x40;
11111 if (Imm & 0x40) NewImm |= 0x08;
11112 return getI8Imm(NewImm, SDLoc(N));
11114 def VPTERNLOG213_imm8 : SDNodeXForm<timm, [{
11115 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11116 uint8_t Imm = N->getZExtValue();
11117 // Swap bits 2/4 and 3/5.
11118 uint8_t NewImm = Imm & 0xc3;
11119 if (Imm & 0x04) NewImm |= 0x10;
11120 if (Imm & 0x10) NewImm |= 0x04;
11121 if (Imm & 0x08) NewImm |= 0x20;
11122 if (Imm & 0x20) NewImm |= 0x08;
11123 return getI8Imm(NewImm, SDLoc(N));
11125 def VPTERNLOG132_imm8 : SDNodeXForm<timm, [{
11126 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
11127 uint8_t Imm = N->getZExtValue();
11128 // Swap bits 1/2 and 5/6.
11129 uint8_t NewImm = Imm & 0x99;
11130 if (Imm & 0x02) NewImm |= 0x04;
11131 if (Imm & 0x04) NewImm |= 0x02;
11132 if (Imm & 0x20) NewImm |= 0x40;
11133 if (Imm & 0x40) NewImm |= 0x20;
11134 return getI8Imm(NewImm, SDLoc(N));
11136 def VPTERNLOG231_imm8 : SDNodeXForm<timm, [{
11137 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
11138 uint8_t Imm = N->getZExtValue();
11139 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
11140 uint8_t NewImm = Imm & 0x81;
11141 if (Imm & 0x02) NewImm |= 0x04;
11142 if (Imm & 0x04) NewImm |= 0x10;
11143 if (Imm & 0x08) NewImm |= 0x40;
11144 if (Imm & 0x10) NewImm |= 0x02;
11145 if (Imm & 0x20) NewImm |= 0x08;
11146 if (Imm & 0x40) NewImm |= 0x20;
11147 return getI8Imm(NewImm, SDLoc(N));
11149 def VPTERNLOG312_imm8 : SDNodeXForm<timm, [{
11150 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
11151 uint8_t Imm = N->getZExtValue();
11152 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
11153 uint8_t NewImm = Imm & 0x81;
11154 if (Imm & 0x02) NewImm |= 0x10;
11155 if (Imm & 0x04) NewImm |= 0x02;
11156 if (Imm & 0x08) NewImm |= 0x20;
11157 if (Imm & 0x10) NewImm |= 0x04;
11158 if (Imm & 0x20) NewImm |= 0x40;
11159 if (Imm & 0x40) NewImm |= 0x08;
11160 return getI8Imm(NewImm, SDLoc(N));
11163 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
11164 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11166 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
11167 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11168 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
11169 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11170 (OpNode (_.VT _.RC:$src1),
11173 (i8 timm:$src4)), 1, 1>,
11174 AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
11175 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11176 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
11177 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
11178 (OpNode (_.VT _.RC:$src1),
11180 (_.VT (bitconvert (_.LdFrag addr:$src3))),
11181 (i8 timm:$src4)), 1, 0>,
11182 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11183 Sched<[sched.Folded, sched.ReadAfterFold]>;
11184 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11185 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
11186 OpcodeStr, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11187 "$src2, ${src3}"#_.BroadcastStr#", $src4",
11188 (OpNode (_.VT _.RC:$src1),
11190 (_.VT (_.BroadcastLdFrag addr:$src3)),
11191 (i8 timm:$src4)), 1, 0>, EVEX_B,
11192 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
11193 Sched<[sched.Folded, sched.ReadAfterFold]>;
11194 }// Constraints = "$src1 = $dst"
11196 // Additional patterns for matching passthru operand in other positions.
11197 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11198 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11200 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11201 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11202 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11203 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
11205 (!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
11206 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11208 // Additional patterns for matching loads in other positions.
11209 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
11210 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11211 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11212 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11213 def : Pat<(_.VT (OpNode _.RC:$src1,
11214 (bitconvert (_.LdFrag addr:$src3)),
11215 _.RC:$src2, (i8 timm:$src4))),
11216 (!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
11217 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11219 // Additional patterns for matching zero masking with loads in other
11221 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11222 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11223 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11225 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11226 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11227 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11228 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11229 _.RC:$src2, (i8 timm:$src4)),
11231 (!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
11232 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11234 // Additional patterns for matching masked loads with different
11236 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11237 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
11238 _.RC:$src2, (i8 timm:$src4)),
11240 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11241 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11242 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11243 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11244 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11246 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11247 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11248 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11249 (OpNode _.RC:$src2, _.RC:$src1,
11250 (bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
11252 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11253 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11254 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11255 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
11256 _.RC:$src1, (i8 timm:$src4)),
11258 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11259 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11260 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11261 (OpNode (bitconvert (_.LdFrag addr:$src3)),
11262 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11264 (!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
11265 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11267 // Additional patterns for matching broadcasts in other positions.
11268 def : Pat<(_.VT (OpNode (_.BroadcastLdFrag addr:$src3),
11269 _.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
11270 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11271 addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11272 def : Pat<(_.VT (OpNode _.RC:$src1,
11273 (_.BroadcastLdFrag addr:$src3),
11274 _.RC:$src2, (i8 timm:$src4))),
11275 (!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
11276 addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11278 // Additional patterns for matching zero masking with broadcasts in other
11280 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11281 (OpNode (_.BroadcastLdFrag addr:$src3),
11282 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11284 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11285 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11286 (VPTERNLOG321_imm8 timm:$src4))>;
11287 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11288 (OpNode _.RC:$src1,
11289 (_.BroadcastLdFrag addr:$src3),
11290 _.RC:$src2, (i8 timm:$src4)),
11292 (!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
11293 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
11294 (VPTERNLOG132_imm8 timm:$src4))>;
11296 // Additional patterns for matching masked broadcasts with different
11298 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11299 (OpNode _.RC:$src1, (_.BroadcastLdFrag addr:$src3),
11300 _.RC:$src2, (i8 timm:$src4)),
11302 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11303 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 timm:$src4))>;
11304 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11305 (OpNode (_.BroadcastLdFrag addr:$src3),
11306 _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
11308 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11309 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 timm:$src4))>;
11310 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11311 (OpNode _.RC:$src2, _.RC:$src1,
11312 (_.BroadcastLdFrag addr:$src3),
11313 (i8 timm:$src4)), _.RC:$src1)),
11314 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11315 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 timm:$src4))>;
11316 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11317 (OpNode _.RC:$src2,
11318 (_.BroadcastLdFrag addr:$src3),
11319 _.RC:$src1, (i8 timm:$src4)),
11321 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11322 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 timm:$src4))>;
11323 def : Pat<(_.VT (vselect_mask _.KRCWM:$mask,
11324 (OpNode (_.BroadcastLdFrag addr:$src3),
11325 _.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
11327 (!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
11328 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 timm:$src4))>;
11331 multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
11332 AVX512VLVectorVTInfo _> {
11333 let Predicates = [HasAVX512] in
11334 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.ZMM,
11335 _.info512, NAME>, EVEX_V512;
11336 let Predicates = [HasAVX512, HasVLX] in {
11337 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.XMM,
11338 _.info128, NAME>, EVEX_V128;
11339 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched.YMM,
11340 _.info256, NAME>, EVEX_V256;
11344 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
11345 avx512vl_i32_info>;
11346 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
11347 avx512vl_i64_info>, VEX_W;
11349 // Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
11350 let Predicates = [HasVLX] in {
11351 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11353 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11355 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11356 (loadv16i8 addr:$src3), (i8 timm:$src4))),
11357 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11359 def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
11360 VR128X:$src1, (i8 timm:$src4))),
11361 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11362 (VPTERNLOG321_imm8 timm:$src4))>;
11363 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
11364 VR128X:$src2, (i8 timm:$src4))),
11365 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11366 (VPTERNLOG132_imm8 timm:$src4))>;
11368 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11369 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11371 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11373 def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11374 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11375 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11376 (VPTERNLOG321_imm8 timm:$src4))>;
11377 def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
11378 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11379 VR128X:$src2, (i8 timm:$src4))),
11380 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11381 (VPTERNLOG132_imm8 timm:$src4))>;
11383 def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
11384 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11386 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11388 def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11389 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11390 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11391 (VPTERNLOG321_imm8 timm:$src4))>;
11392 def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
11393 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11394 VR128X:$src2, (i8 timm:$src4))),
11395 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11396 (VPTERNLOG132_imm8 timm:$src4))>;
11398 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
11400 (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
11402 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11403 (loadv8i16 addr:$src3), (i8 timm:$src4))),
11404 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11406 def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
11407 VR128X:$src1, (i8 timm:$src4))),
11408 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11409 (VPTERNLOG321_imm8 timm:$src4))>;
11410 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
11411 VR128X:$src2, (i8 timm:$src4))),
11412 (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
11413 (VPTERNLOG132_imm8 timm:$src4))>;
11415 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11416 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11418 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11420 def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11421 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11422 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11423 (VPTERNLOG321_imm8 timm:$src4))>;
11424 def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
11425 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11426 VR128X:$src2, (i8 timm:$src4))),
11427 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11428 (VPTERNLOG132_imm8 timm:$src4))>;
11430 def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
11431 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11433 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11435 def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11436 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11437 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11438 (VPTERNLOG321_imm8 timm:$src4))>;
11439 def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
11440 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11441 VR128X:$src2, (i8 timm:$src4))),
11442 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11443 (VPTERNLOG132_imm8 timm:$src4))>;
11445 def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
11446 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11448 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11450 def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11451 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11452 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11453 (VPTERNLOG321_imm8 timm:$src4))>;
11454 def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
11455 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
11456 VR128X:$src2, (i8 timm:$src4))),
11457 (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11458 (VPTERNLOG132_imm8 timm:$src4))>;
11460 def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
11461 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11463 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11465 def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11466 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
11467 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11468 (VPTERNLOG321_imm8 timm:$src4))>;
11469 def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
11470 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
11471 VR128X:$src2, (i8 timm:$src4))),
11472 (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
11473 (VPTERNLOG132_imm8 timm:$src4))>;
11475 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11477 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11479 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11480 (loadv32i8 addr:$src3), (i8 timm:$src4))),
11481 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11483 def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
11484 VR256X:$src1, (i8 timm:$src4))),
11485 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11486 (VPTERNLOG321_imm8 timm:$src4))>;
11487 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
11488 VR256X:$src2, (i8 timm:$src4))),
11489 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11490 (VPTERNLOG132_imm8 timm:$src4))>;
11492 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11493 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11495 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11497 def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11498 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11499 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11500 (VPTERNLOG321_imm8 timm:$src4))>;
11501 def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
11502 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11503 VR256X:$src2, (i8 timm:$src4))),
11504 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11505 (VPTERNLOG132_imm8 timm:$src4))>;
11507 def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
11508 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11510 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11512 def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11513 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11514 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11515 (VPTERNLOG321_imm8 timm:$src4))>;
11516 def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
11517 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11518 VR256X:$src2, (i8 timm:$src4))),
11519 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11520 (VPTERNLOG132_imm8 timm:$src4))>;
11522 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
11524 (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
11526 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11527 (loadv16i16 addr:$src3), (i8 timm:$src4))),
11528 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11530 def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
11531 VR256X:$src1, (i8 timm:$src4))),
11532 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11533 (VPTERNLOG321_imm8 timm:$src4))>;
11534 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
11535 VR256X:$src2, (i8 timm:$src4))),
11536 (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
11537 (VPTERNLOG132_imm8 timm:$src4))>;
11539 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11540 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11542 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11544 def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11545 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11546 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11547 (VPTERNLOG321_imm8 timm:$src4))>;
11548 def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
11549 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11550 VR256X:$src2, (i8 timm:$src4))),
11551 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11552 (VPTERNLOG132_imm8 timm:$src4))>;
11554 def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
11555 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11557 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11559 def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11560 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11561 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11562 (VPTERNLOG321_imm8 timm:$src4))>;
11563 def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
11564 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11565 VR256X:$src2, (i8 timm:$src4))),
11566 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11567 (VPTERNLOG132_imm8 timm:$src4))>;
11569 def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
11570 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11572 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11574 def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11575 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11576 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11577 (VPTERNLOG321_imm8 timm:$src4))>;
11578 def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
11579 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
11580 VR256X:$src2, (i8 timm:$src4))),
11581 (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11582 (VPTERNLOG132_imm8 timm:$src4))>;
11584 def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
11585 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11587 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11589 def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11590 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
11591 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11592 (VPTERNLOG321_imm8 timm:$src4))>;
11593 def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
11594 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
11595 VR256X:$src2, (i8 timm:$src4))),
11596 (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
11597 (VPTERNLOG132_imm8 timm:$src4))>;
11600 let Predicates = [HasAVX512] in {
11601 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11603 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11605 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11606 (loadv64i8 addr:$src3), (i8 timm:$src4))),
11607 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11609 def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
11610 VR512:$src1, (i8 timm:$src4))),
11611 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11612 (VPTERNLOG321_imm8 timm:$src4))>;
11613 def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
11614 VR512:$src2, (i8 timm:$src4))),
11615 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11616 (VPTERNLOG132_imm8 timm:$src4))>;
11618 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11619 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11621 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11623 def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11624 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11625 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11626 (VPTERNLOG321_imm8 timm:$src4))>;
11627 def : Pat<(v64i8 (X86vpternlog VR512:$src1,
11628 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11629 VR512:$src2, (i8 timm:$src4))),
11630 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11631 (VPTERNLOG132_imm8 timm:$src4))>;
11633 def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
11634 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11636 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11638 def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11639 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11640 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11641 (VPTERNLOG321_imm8 timm:$src4))>;
11642 def : Pat<(v64i8 (X86vpternlog VR512:$src1,
11643 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11644 VR512:$src2, (i8 timm:$src4))),
11645 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11646 (VPTERNLOG132_imm8 timm:$src4))>;
11648 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
11650 (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
11652 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11653 (loadv32i16 addr:$src3), (i8 timm:$src4))),
11654 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11656 def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
11657 VR512:$src1, (i8 timm:$src4))),
11658 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11659 (VPTERNLOG321_imm8 timm:$src4))>;
11660 def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
11661 VR512:$src2, (i8 timm:$src4))),
11662 (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
11663 (VPTERNLOG132_imm8 timm:$src4))>;
11665 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11666 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11668 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11670 def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11671 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11672 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11673 (VPTERNLOG321_imm8 timm:$src4))>;
11674 def : Pat<(v32i16 (X86vpternlog VR512:$src1,
11675 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11676 VR512:$src2, (i8 timm:$src4))),
11677 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11678 (VPTERNLOG132_imm8 timm:$src4))>;
11680 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11681 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11683 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11685 def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11686 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11687 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11688 (VPTERNLOG321_imm8 timm:$src4))>;
11689 def : Pat<(v32i16 (X86vpternlog VR512:$src1,
11690 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11691 VR512:$src2, (i8 timm:$src4))),
11692 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11693 (VPTERNLOG132_imm8 timm:$src4))>;
11695 def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
11696 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11698 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11700 def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11701 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11702 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11703 (VPTERNLOG321_imm8 timm:$src4))>;
11704 def : Pat<(v32i16 (X86vpternlog VR512:$src1,
11705 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11706 VR512:$src2, (i8 timm:$src4))),
11707 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11708 (VPTERNLOG132_imm8 timm:$src4))>;
11710 def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
11711 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11713 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11715 def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11716 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11717 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11718 (VPTERNLOG321_imm8 timm:$src4))>;
11719 def : Pat<(v16i32 (X86vpternlog VR512:$src1,
11720 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
11721 VR512:$src2, (i8 timm:$src4))),
11722 (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11723 (VPTERNLOG132_imm8 timm:$src4))>;
11725 def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
11726 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11728 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11730 def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11731 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
11732 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11733 (VPTERNLOG321_imm8 timm:$src4))>;
11734 def : Pat<(v8i64 (X86vpternlog VR512:$src1,
11735 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
11736 VR512:$src2, (i8 timm:$src4))),
11737 (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
11738 (VPTERNLOG132_imm8 timm:$src4))>;
11741 // Patterns to implement vnot using vpternlog instead of creating all ones
11742 // using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
11743 // so that the result is only dependent on src0. But we use the same source
11744 // for all operands to prevent a false dependency.
11745 // TODO: We should maybe have a more generalized algorithm for folding to
11747 let Predicates = [HasAVX512] in {
11748 def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)),
11749 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11750 def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)),
11751 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11752 def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)),
11753 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11754 def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)),
11755 (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>;
11758 let Predicates = [HasAVX512, NoVLX] in {
11759 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11762 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11763 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11764 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11765 (i8 15)), sub_xmm)>;
11766 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11769 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11770 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11771 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11772 (i8 15)), sub_xmm)>;
11773 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11776 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11777 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11778 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11779 (i8 15)), sub_xmm)>;
11780 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11783 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11784 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11785 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
11786 (i8 15)), sub_xmm)>;
11788 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11791 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11792 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11793 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11794 (i8 15)), sub_ymm)>;
11795 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11798 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11799 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11800 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11801 (i8 15)), sub_ymm)>;
11802 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11805 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11806 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11807 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11808 (i8 15)), sub_ymm)>;
11809 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11812 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11813 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11814 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
11815 (i8 15)), sub_ymm)>;
11818 let Predicates = [HasVLX] in {
11819 def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)),
11820 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11821 def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)),
11822 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11823 def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)),
11824 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11825 def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)),
11826 (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>;
11828 def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)),
11829 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11830 def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)),
11831 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11832 def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)),
11833 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11834 def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)),
11835 (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
11838 //===----------------------------------------------------------------------===//
11839 // AVX-512 - FixupImm
11840 //===----------------------------------------------------------------------===//
11842 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
11843 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11844 X86VectorVTInfo TblVT>{
11845 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
11846 Uses = [MXCSR], mayRaiseFPException = 1 in {
11847 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11848 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11849 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11850 (X86VFixupimm (_.VT _.RC:$src1),
11852 (TblVT.VT _.RC:$src3),
11853 (i32 timm:$src4))>, Sched<[sched]>;
11854 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11855 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
11856 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11857 (X86VFixupimm (_.VT _.RC:$src1),
11859 (TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
11860 (i32 timm:$src4))>,
11861 Sched<[sched.Folded, sched.ReadAfterFold]>;
11862 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
11863 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11864 OpcodeStr#_.Suffix, "$src4, ${src3}"#_.BroadcastStr#", $src2",
11865 "$src2, ${src3}"#_.BroadcastStr#", $src4",
11866 (X86VFixupimm (_.VT _.RC:$src1),
11868 (TblVT.VT (TblVT.BroadcastLdFrag addr:$src3)),
11869 (i32 timm:$src4))>,
11870 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11871 } // Constraints = "$src1 = $dst"
11874 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
11875 X86FoldableSchedWrite sched,
11876 X86VectorVTInfo _, X86VectorVTInfo TblVT>
11877 : avx512_fixupimm_packed<opc, OpcodeStr, sched, _, TblVT> {
11878 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
11879 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
11880 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11881 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11882 "$src2, $src3, {sae}, $src4",
11883 (X86VFixupimmSAE (_.VT _.RC:$src1),
11885 (TblVT.VT _.RC:$src3),
11886 (i32 timm:$src4))>,
11887 EVEX_B, Sched<[sched]>;
11891 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
11892 X86FoldableSchedWrite sched, X86VectorVTInfo _,
11893 X86VectorVTInfo _src3VT> {
11894 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
11895 ExeDomain = _.ExeDomain in {
11896 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11897 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11898 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11899 (X86VFixupimms (_.VT _.RC:$src1),
11901 (_src3VT.VT _src3VT.RC:$src3),
11902 (i32 timm:$src4))>, Sched<[sched]>, SIMD_EXC;
11903 let Uses = [MXCSR] in
11904 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
11905 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
11906 OpcodeStr#_.Suffix, "$src4, {sae}, $src3, $src2",
11907 "$src2, $src3, {sae}, $src4",
11908 (X86VFixupimmSAEs (_.VT _.RC:$src1),
11910 (_src3VT.VT _src3VT.RC:$src3),
11911 (i32 timm:$src4))>,
11912 EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
11913 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
11914 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
11915 OpcodeStr#_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
11916 (X86VFixupimms (_.VT _.RC:$src1),
11918 (_src3VT.VT (scalar_to_vector
11919 (_src3VT.ScalarLdFrag addr:$src3))),
11920 (i32 timm:$src4))>,
11921 Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
11925 multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
11926 AVX512VLVectorVTInfo _Vec,
11927 AVX512VLVectorVTInfo _Tbl> {
11928 let Predicates = [HasAVX512] in
11929 defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
11930 _Vec.info512, _Tbl.info512>, AVX512AIi8Base,
11931 EVEX_4V, EVEX_V512;
11932 let Predicates = [HasAVX512, HasVLX] in {
11933 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.XMM,
11934 _Vec.info128, _Tbl.info128>, AVX512AIi8Base,
11935 EVEX_4V, EVEX_V128;
11936 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", sched.YMM,
11937 _Vec.info256, _Tbl.info256>, AVX512AIi8Base,
11938 EVEX_4V, EVEX_V256;
11942 defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11943 SchedWriteFAdd.Scl, f32x_info, v4i32x_info>,
11944 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
11945 defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
11946 SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
11947 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
11948 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
11949 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
11950 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
11951 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
11953 // Patterns used to select SSE scalar fp arithmetic instructions from
11956 // (1) a scalar fp operation followed by a blend
11958 // The effect is that the backend no longer emits unnecessary vector
11959 // insert instructions immediately after SSE scalar fp instructions
11960 // like addss or mulss.
11962 // For example, given the following code:
11963 // __m128 foo(__m128 A, __m128 B) {
11968 // Previously we generated:
11969 // addss %xmm0, %xmm1
11970 // movss %xmm1, %xmm0
11972 // We now generate:
11973 // addss %xmm1, %xmm0
11975 // (2) a vector packed single/double fp operation followed by a vector insert
11977 // The effect is that the backend converts the packed fp instruction
11978 // followed by a vector insert into a single SSE scalar fp instruction.
11980 // For example, given the following code:
11981 // __m128 foo(__m128 A, __m128 B) {
11982 // __m128 C = A + B;
11983 // return (__m128) {c[0], a[1], a[2], a[3]};
11986 // Previously we generated:
11987 // addps %xmm0, %xmm1
11988 // movss %xmm1, %xmm0
11990 // We now generate:
11991 // addss %xmm1, %xmm0
11993 // TODO: Some canonicalization in lowering would simplify the number of
11994 // patterns we have to try to match.
11995 multiclass AVX512_scalar_math_fp_patterns<SDNode Op, SDNode MaskedOp,
11996 string OpcPrefix, SDNode MoveNode,
11997 X86VectorVTInfo _, PatLeaf ZeroFP> {
11998 let Predicates = [HasAVX512] in {
11999 // extracted scalar math op with insert via movss
12000 def : Pat<(MoveNode
12001 (_.VT VR128X:$dst),
12002 (_.VT (scalar_to_vector
12003 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12005 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Int") _.VT:$dst,
12006 (_.VT (COPY_TO_REGCLASS _.FRC:$src, VR128X)))>;
12007 def : Pat<(MoveNode
12008 (_.VT VR128X:$dst),
12009 (_.VT (scalar_to_vector
12010 (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))),
12011 (_.ScalarLdFrag addr:$src))))),
12012 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Int") _.VT:$dst, addr:$src)>;
12014 // extracted masked scalar math op with insert via movss
12015 def : Pat<(MoveNode (_.VT VR128X:$src1),
12017 (X86selects_mask VK1WM:$mask,
12019 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12022 (!cast<Instruction>("V"#OpcPrefix#"Zrr_Intk")
12023 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12024 VK1WM:$mask, _.VT:$src1,
12025 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12026 def : Pat<(MoveNode (_.VT VR128X:$src1),
12028 (X86selects_mask VK1WM:$mask,
12030 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12031 (_.ScalarLdFrag addr:$src2)),
12033 (!cast<Instruction>("V"#OpcPrefix#"Zrm_Intk")
12034 (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)),
12035 VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12037 // extracted masked scalar math op with insert via movss
12038 def : Pat<(MoveNode (_.VT VR128X:$src1),
12040 (X86selects_mask VK1WM:$mask,
12042 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12043 _.FRC:$src2), (_.EltVT ZeroFP)))),
12044 (!cast<I>("V"#OpcPrefix#"Zrr_Intkz")
12045 VK1WM:$mask, _.VT:$src1,
12046 (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
12047 def : Pat<(MoveNode (_.VT VR128X:$src1),
12049 (X86selects_mask VK1WM:$mask,
12051 (extractelt (_.VT VR128X:$src1), (iPTR 0))),
12052 (_.ScalarLdFrag addr:$src2)), (_.EltVT ZeroFP)))),
12053 (!cast<I>("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>;
12057 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSS", X86Movss, v4f32x_info, fp32imm0>;
12058 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSS", X86Movss, v4f32x_info, fp32imm0>;
12059 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSS", X86Movss, v4f32x_info, fp32imm0>;
12060 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSS", X86Movss, v4f32x_info, fp32imm0>;
12062 defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSD", X86Movsd, v2f64x_info, fp64imm0>;
12063 defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_info, fp64imm0>;
12064 defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
12065 defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
12067 multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
12068 SDNode Move, X86VectorVTInfo _> {
12069 let Predicates = [HasAVX512] in {
12070 def : Pat<(_.VT (Move _.VT:$dst,
12071 (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
12072 (!cast<Instruction>("V"#OpcPrefix#"Zr_Int") _.VT:$dst, _.VT:$src)>;
12076 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
12077 defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
12079 //===----------------------------------------------------------------------===//
12080 // AES instructions
12081 //===----------------------------------------------------------------------===//
12083 multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
12084 let Predicates = [HasVLX, HasVAES] in {
12085 defm Z128 : AESI_binop_rm_int<Op, OpStr,
12086 !cast<Intrinsic>(IntPrefix),
12087 loadv2i64, 0, VR128X, i128mem>,
12088 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
12089 defm Z256 : AESI_binop_rm_int<Op, OpStr,
12090 !cast<Intrinsic>(IntPrefix#"_256"),
12091 loadv4i64, 0, VR256X, i256mem>,
12092 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
12094 let Predicates = [HasAVX512, HasVAES] in
12095 defm Z : AESI_binop_rm_int<Op, OpStr,
12096 !cast<Intrinsic>(IntPrefix#"_512"),
12097 loadv8i64, 0, VR512, i512mem>,
12098 EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
12101 defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
12102 defm VAESENCLAST : avx512_vaes<0xDD, "vaesenclast", "int_x86_aesni_aesenclast">;
12103 defm VAESDEC : avx512_vaes<0xDE, "vaesdec", "int_x86_aesni_aesdec">;
12104 defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">;
12106 //===----------------------------------------------------------------------===//
12107 // PCLMUL instructions - Carry less multiplication
12108 //===----------------------------------------------------------------------===//
12110 let Predicates = [HasAVX512, HasVPCLMULQDQ] in
12111 defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
12112 EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
12114 let Predicates = [HasVLX, HasVPCLMULQDQ] in {
12115 defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
12116 EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
12118 defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
12119 int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
12120 EVEX_CD8<64, CD8VF>, VEX_WIG;
12124 defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
12125 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
12126 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
12128 //===----------------------------------------------------------------------===//
12130 //===----------------------------------------------------------------------===//
12132 multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
12133 X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12134 let Constraints = "$src1 = $dst",
12135 ExeDomain = VTI.ExeDomain in {
12136 defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12137 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12138 "$src3, $src2", "$src2, $src3",
12139 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
12140 AVX512FMA3Base, Sched<[sched]>;
12141 defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12142 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12143 "$src3, $src2", "$src2, $src3",
12144 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12145 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12147 Sched<[sched.Folded, sched.ReadAfterFold]>;
12151 multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12152 X86FoldableSchedWrite sched, X86VectorVTInfo VTI>
12153 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> {
12154 let Constraints = "$src1 = $dst",
12155 ExeDomain = VTI.ExeDomain in
12156 defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12157 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
12158 "${src3}"#VTI.BroadcastStr#", $src2",
12159 "$src2, ${src3}"#VTI.BroadcastStr,
12160 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12161 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12162 AVX512FMA3Base, EVEX_B,
12163 Sched<[sched.Folded, sched.ReadAfterFold]>;
12166 multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
12167 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12168 let Predicates = [HasVBMI2] in
12169 defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12171 let Predicates = [HasVBMI2, HasVLX] in {
12172 defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12174 defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12179 multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
12180 X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12181 let Predicates = [HasVBMI2] in
12182 defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.ZMM, VTI.info512>,
12184 let Predicates = [HasVBMI2, HasVLX] in {
12185 defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.YMM, VTI.info256>,
12187 defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched.XMM, VTI.info128>,
12191 multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
12192 SDNode OpNode, X86SchedWriteWidths sched> {
12193 defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
12194 avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
12195 defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
12196 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
12197 defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
12198 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
12201 multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
12202 SDNode OpNode, X86SchedWriteWidths sched> {
12203 defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
12204 avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
12205 VEX_W, EVEX_CD8<16, CD8VF>;
12206 defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
12207 OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
12208 defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
12209 sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
12213 defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SchedWriteVecIMul>;
12214 defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SchedWriteVecIMul>;
12215 defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SchedWriteVecIMul>;
12216 defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul>;
12219 defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
12220 avx512vl_i8_info, HasVBMI2>, EVEX,
12222 defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
12223 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
12226 defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
12227 avx512vl_i8_info, HasVBMI2>, EVEX;
12228 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
12229 avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
12231 //===----------------------------------------------------------------------===//
12233 //===----------------------------------------------------------------------===//
12235 let Constraints = "$src1 = $dst" in
12236 multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
12237 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12238 bit IsCommutable> {
12239 let ExeDomain = VTI.ExeDomain in {
12240 defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12241 (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
12242 "$src3, $src2", "$src2, $src3",
12243 (VTI.VT (OpNode VTI.RC:$src1,
12244 VTI.RC:$src2, VTI.RC:$src3)),
12245 IsCommutable, IsCommutable>,
12246 EVEX_4V, T8PD, Sched<[sched]>;
12247 defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12248 (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
12249 "$src3, $src2", "$src2, $src3",
12250 (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
12251 (VTI.VT (VTI.LdFrag addr:$src3))))>,
12252 EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
12253 Sched<[sched.Folded, sched.ReadAfterFold]>;
12254 defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12255 (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
12256 OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
12257 "$src2, ${src3}"#VTI.BroadcastStr,
12258 (OpNode VTI.RC:$src1, VTI.RC:$src2,
12259 (VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
12260 EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
12261 T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
12265 multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode,
12266 X86SchedWriteWidths sched, bit IsCommutable> {
12267 let Predicates = [HasVNNI] in
12268 defm Z : VNNI_rmb<Op, OpStr, OpNode, sched.ZMM, v16i32_info,
12269 IsCommutable>, EVEX_V512;
12270 let Predicates = [HasVNNI, HasVLX] in {
12271 defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched.YMM, v8i32x_info,
12272 IsCommutable>, EVEX_V256;
12273 defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched.XMM, v4i32x_info,
12274 IsCommutable>, EVEX_V128;
12278 // FIXME: Is there a better scheduler class for VPDP?
12279 defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SchedWriteVecIMul, 0>;
12280 defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SchedWriteVecIMul, 0>;
12281 defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SchedWriteVecIMul, 1>;
12282 defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul, 1>;
12284 def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
12285 (X86vpmaddwd node:$lhs, node:$rhs), [{
12286 return N->hasOneUse();
12289 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
12290 let Predicates = [HasVNNI] in {
12291 def : Pat<(v16i32 (add VR512:$src1,
12292 (X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12293 (VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12294 def : Pat<(v16i32 (add VR512:$src1,
12295 (X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12296 (VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12298 let Predicates = [HasVNNI,HasVLX] in {
12299 def : Pat<(v8i32 (add VR256X:$src1,
12300 (X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12301 (VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12302 def : Pat<(v8i32 (add VR256X:$src1,
12303 (X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12304 (VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12305 def : Pat<(v4i32 (add VR128X:$src1,
12306 (X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12307 (VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12308 def : Pat<(v4i32 (add VR128X:$src1,
12309 (X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12310 (VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12313 //===----------------------------------------------------------------------===//
12315 //===----------------------------------------------------------------------===//
12317 // FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW?
12318 defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
12319 avx512vl_i8_info, HasBITALG>;
12320 defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
12321 avx512vl_i16_info, HasBITALG>, VEX_W;
12323 defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
12324 defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
12326 def X86Vpshufbitqmb_su : PatFrag<(ops node:$src1, node:$src2),
12327 (X86Vpshufbitqmb node:$src1, node:$src2), [{
12328 return N->hasOneUse();
12331 multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> {
12332 defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst),
12333 (ins VTI.RC:$src1, VTI.RC:$src2),
12335 "$src2, $src1", "$src1, $src2",
12336 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12337 (VTI.VT VTI.RC:$src2)),
12338 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12339 (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD,
12341 defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst),
12342 (ins VTI.RC:$src1, VTI.MemOp:$src2),
12344 "$src2, $src1", "$src1, $src2",
12345 (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1),
12346 (VTI.VT (VTI.LdFrag addr:$src2))),
12347 (X86Vpshufbitqmb_su (VTI.VT VTI.RC:$src1),
12348 (VTI.VT (VTI.LdFrag addr:$src2)))>,
12349 EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD,
12350 Sched<[sched.Folded, sched.ReadAfterFold]>;
12353 multiclass VPSHUFBITQMB_common<X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTI> {
12354 let Predicates = [HasBITALG] in
12355 defm Z : VPSHUFBITQMB_rm<sched.ZMM, VTI.info512>, EVEX_V512;
12356 let Predicates = [HasBITALG, HasVLX] in {
12357 defm Z256 : VPSHUFBITQMB_rm<sched.YMM, VTI.info256>, EVEX_V256;
12358 defm Z128 : VPSHUFBITQMB_rm<sched.XMM, VTI.info128>, EVEX_V128;
12362 // FIXME: Is there a better scheduler class for VPSHUFBITQMB?
12363 defm VPSHUFBITQMB : VPSHUFBITQMB_common<SchedWriteVecIMul, avx512vl_i8_info>;
12365 //===----------------------------------------------------------------------===//
12367 //===----------------------------------------------------------------------===//
12369 multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12370 X86SchedWriteWidths sched> {
12371 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12372 defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, sched.ZMM, 1>,
12374 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12375 defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, sched.YMM, 1>,
12377 defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, sched.XMM, 1>,
12382 defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb,
12384 EVEX_CD8<8, CD8VF>, T8PD;
12386 multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
12387 X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
12388 X86VectorVTInfo BcstVTI>
12389 : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> {
12390 let ExeDomain = VTI.ExeDomain in
12391 defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12392 (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
12393 OpStr, "$src3, ${src2}"#BcstVTI.BroadcastStr#", $src1",
12394 "$src1, ${src2}"#BcstVTI.BroadcastStr#", $src3",
12395 (OpNode (VTI.VT VTI.RC:$src1),
12396 (bitconvert (BcstVTI.VT (X86VBroadcastld64 addr:$src2))),
12397 (i8 timm:$src3))>, EVEX_B,
12398 Sched<[sched.Folded, sched.ReadAfterFold]>;
12401 multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
12402 X86SchedWriteWidths sched> {
12403 let Predicates = [HasGFNI, HasAVX512, HasBWI] in
12404 defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.ZMM,
12405 v64i8_info, v8i64_info>, EVEX_V512;
12406 let Predicates = [HasGFNI, HasVLX, HasBWI] in {
12407 defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.YMM,
12408 v32i8x_info, v4i64x_info>, EVEX_V256;
12409 defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched.XMM,
12410 v16i8x_info, v2i64x_info>, EVEX_V128;
12414 defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
12415 X86GF2P8affineinvqb, SchedWriteVecIMul>,
12416 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12417 defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
12418 X86GF2P8affineqb, SchedWriteVecIMul>,
12419 EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
12422 //===----------------------------------------------------------------------===//
12424 //===----------------------------------------------------------------------===//
12426 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedSingle,
12427 Constraints = "$src1 = $dst", Uses = [MXCSR], mayRaiseFPException = 1 in {
12428 defm V4FMADDPSrm : AVX512_maskable_3src_in_asm<0x9A, MRMSrcMem, v16f32_info,
12429 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12430 "v4fmaddps", "$src3, $src2", "$src2, $src3",
12431 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12432 Sched<[SchedWriteFMA.ZMM.Folded]>;
12434 defm V4FNMADDPSrm : AVX512_maskable_3src_in_asm<0xAA, MRMSrcMem, v16f32_info,
12435 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12436 "v4fnmaddps", "$src3, $src2", "$src2, $src3",
12437 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12438 Sched<[SchedWriteFMA.ZMM.Folded]>;
12440 defm V4FMADDSSrm : AVX512_maskable_3src_in_asm<0x9B, MRMSrcMem, f32x_info,
12441 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12442 "v4fmaddss", "$src3, $src2", "$src2, $src3",
12443 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12444 Sched<[SchedWriteFMA.Scl.Folded]>;
12446 defm V4FNMADDSSrm : AVX512_maskable_3src_in_asm<0xAB, MRMSrcMem, f32x_info,
12447 (outs VR128X:$dst), (ins VR128X:$src2, f128mem:$src3),
12448 "v4fnmaddss", "$src3, $src2", "$src2, $src3",
12449 []>, VEX_LIG, EVEX_4V, T8XD, EVEX_CD8<32, CD8VF>,
12450 Sched<[SchedWriteFMA.Scl.Folded]>;
12453 //===----------------------------------------------------------------------===//
12455 //===----------------------------------------------------------------------===//
12457 let hasSideEffects = 0, mayLoad = 1, ExeDomain = SSEPackedInt,
12458 Constraints = "$src1 = $dst" in {
12459 defm VP4DPWSSDrm : AVX512_maskable_3src_in_asm<0x52, MRMSrcMem, v16i32_info,
12460 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12461 "vp4dpwssd", "$src3, $src2", "$src2, $src3",
12462 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12463 Sched<[SchedWriteFMA.ZMM.Folded]>;
12465 defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
12466 (outs VR512:$dst), (ins VR512:$src2, f128mem:$src3),
12467 "vp4dpwssds", "$src3, $src2", "$src2, $src3",
12468 []>, EVEX_V512, EVEX_4V, T8XD, EVEX_CD8<32, CD8VQ>,
12469 Sched<[SchedWriteFMA.ZMM.Folded]>;
12472 let hasSideEffects = 0 in {
12473 let mayStore = 1, SchedRW = [WriteFStoreX] in
12474 def MASKPAIR16STORE : PseudoI<(outs), (ins anymem:$dst, VK16PAIR:$src), []>;
12475 let mayLoad = 1, SchedRW = [WriteFLoadX] in
12476 def MASKPAIR16LOAD : PseudoI<(outs VK16PAIR:$dst), (ins anymem:$src), []>;
12479 //===----------------------------------------------------------------------===//
12481 //===----------------------------------------------------------------------===//
12483 multiclass avx512_vp2intersect_modes<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
12484 def rr : I<0x68, MRMSrcReg,
12485 (outs _.KRPC:$dst),
12486 (ins _.RC:$src1, _.RC:$src2),
12487 !strconcat("vp2intersect", _.Suffix,
12488 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12489 [(set _.KRPC:$dst, (X86vp2intersect
12490 _.RC:$src1, (_.VT _.RC:$src2)))]>,
12491 EVEX_4V, T8XD, Sched<[sched]>;
12493 def rm : I<0x68, MRMSrcMem,
12494 (outs _.KRPC:$dst),
12495 (ins _.RC:$src1, _.MemOp:$src2),
12496 !strconcat("vp2intersect", _.Suffix,
12497 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
12498 [(set _.KRPC:$dst, (X86vp2intersect
12499 _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>,
12500 EVEX_4V, T8XD, EVEX_CD8<_.EltSize, CD8VF>,
12501 Sched<[sched.Folded, sched.ReadAfterFold]>;
12503 def rmb : I<0x68, MRMSrcMem,
12504 (outs _.KRPC:$dst),
12505 (ins _.RC:$src1, _.ScalarMemOp:$src2),
12506 !strconcat("vp2intersect", _.Suffix, "\t{${src2}", _.BroadcastStr,
12507 ", $src1, $dst|$dst, $src1, ${src2}", _.BroadcastStr ,"}"),
12508 [(set _.KRPC:$dst, (X86vp2intersect
12509 _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))))]>,
12510 EVEX_4V, T8XD, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
12511 Sched<[sched.Folded, sched.ReadAfterFold]>;
12514 multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
12515 let Predicates = [HasAVX512, HasVP2INTERSECT] in
12516 defm Z : avx512_vp2intersect_modes<sched.ZMM, _.info512>, EVEX_V512;
12518 let Predicates = [HasAVX512, HasVP2INTERSECT, HasVLX] in {
12519 defm Z256 : avx512_vp2intersect_modes<sched.YMM, _.info256>, EVEX_V256;
12520 defm Z128 : avx512_vp2intersect_modes<sched.XMM, _.info128>, EVEX_V128;
12524 defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
12525 defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
12527 multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
12528 X86SchedWriteWidths sched,
12529 AVX512VLVectorVTInfo _SrcVTInfo,
12530 AVX512VLVectorVTInfo _DstVTInfo,
12531 SDNode OpNode, Predicate prd,
12532 bit IsCommutable = 0> {
12533 let Predicates = [prd] in
12534 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
12535 _SrcVTInfo.info512, _DstVTInfo.info512,
12536 _SrcVTInfo.info512, IsCommutable>,
12537 EVEX_V512, EVEX_CD8<32, CD8VF>;
12538 let Predicates = [HasVLX, prd] in {
12539 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
12540 _SrcVTInfo.info256, _DstVTInfo.info256,
12541 _SrcVTInfo.info256, IsCommutable>,
12542 EVEX_V256, EVEX_CD8<32, CD8VF>;
12543 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
12544 _SrcVTInfo.info128, _DstVTInfo.info128,
12545 _SrcVTInfo.info128, IsCommutable>,
12546 EVEX_V128, EVEX_CD8<32, CD8VF>;
12550 let ExeDomain = SSEPackedSingle in
12551 defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
12552 SchedWriteCvtPD2PS, //FIXME: Should be SchedWriteCvtPS2BF
12553 avx512vl_f32_info, avx512vl_i16_info,
12554 X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
12556 // Truncate Float to BFloat16
12557 multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
12558 X86SchedWriteWidths sched> {
12559 let ExeDomain = SSEPackedSingle in {
12560 let Predicates = [HasBF16], Uses = []<Register>, mayRaiseFPException = 0 in {
12561 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
12562 X86cvtneps2bf16, X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
12564 let Predicates = [HasBF16, HasVLX] in {
12565 let Uses = []<Register>, mayRaiseFPException = 0 in {
12566 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
12567 null_frag, null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
12569 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
12570 X86cvtneps2bf16, X86cvtneps2bf16,
12571 sched.YMM, "{1to8}", "{y}">, EVEX_V256;
12573 } // Predicates = [HasBF16, HasVLX]
12574 } // ExeDomain = SSEPackedSingle
12576 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12577 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
12579 def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
12580 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
12581 f128mem:$src), 0, "intel">;
12582 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12583 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
12585 def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
12586 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
12587 f256mem:$src), 0, "intel">;
12590 defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
12591 SchedWriteCvtPD2PS>, T8XS,
12592 EVEX_CD8<32, CD8VF>;
12594 let Predicates = [HasBF16, HasVLX] in {
12595 // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
12596 // patterns have been disabled with null_frag.
12597 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
12598 (VCVTNEPS2BF16Z128rr VR128X:$src)>;
12599 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
12601 (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
12602 def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
12604 (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
12606 def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
12607 (VCVTNEPS2BF16Z128rm addr:$src)>;
12608 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
12610 (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12611 def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
12613 (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
12615 def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
12616 (X86VBroadcastld32 addr:$src)))),
12617 (VCVTNEPS2BF16Z128rmb addr:$src)>;
12618 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12619 (v8i16 VR128X:$src0), VK4WM:$mask),
12620 (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
12621 def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcastld32 addr:$src)),
12622 v8i16x_info.ImmAllZerosV, VK4WM:$mask),
12623 (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
12626 let Constraints = "$src1 = $dst" in {
12627 multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
12628 X86FoldableSchedWrite sched,
12629 X86VectorVTInfo _, X86VectorVTInfo src_v> {
12630 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
12631 (ins src_v.RC:$src2, src_v.RC:$src3),
12632 OpcodeStr, "$src3, $src2", "$src2, $src3",
12633 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2, src_v.RC:$src3))>,
12634 EVEX_4V, Sched<[sched]>;
12636 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12637 (ins src_v.RC:$src2, src_v.MemOp:$src3),
12638 OpcodeStr, "$src3, $src2", "$src2, $src3",
12639 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12640 (src_v.LdFrag addr:$src3)))>, EVEX_4V,
12641 Sched<[sched.Folded, sched.ReadAfterFold]>;
12643 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
12644 (ins src_v.RC:$src2, src_v.ScalarMemOp:$src3),
12646 !strconcat("${src3}", _.BroadcastStr,", $src2"),
12647 !strconcat("$src2, ${src3}", _.BroadcastStr),
12648 (_.VT (OpNode _.RC:$src1, src_v.RC:$src2,
12649 (src_v.VT (src_v.BroadcastLdFrag addr:$src3))))>,
12650 EVEX_B, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
12653 } // Constraints = "$src1 = $dst"
12655 multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
12656 X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
12657 AVX512VLVectorVTInfo src_v, Predicate prd> {
12658 let Predicates = [prd] in {
12659 defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.ZMM, _.info512,
12660 src_v.info512>, EVEX_V512;
12662 let Predicates = [HasVLX, prd] in {
12663 defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.YMM, _.info256,
12664 src_v.info256>, EVEX_V256;
12665 defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, sched.XMM, _.info128,
12666 src_v.info128>, EVEX_V128;
12670 let ExeDomain = SSEPackedSingle in
12671 defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
12672 avx512vl_f32_info, avx512vl_i32_info,
12673 HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;