1 //===-- X86InstrAVX512.td - AVX512 Instruction Set ---------*- tablegen -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file describes the X86 AVX512 instruction set, defining the
11 // instructions, and properties of the instructions which are needed for code
12 // generation, machine code emission, and analysis.
14 //===----------------------------------------------------------------------===//
16 // Group template arguments that can be derived from the vector type (EltNum x
17 // EltVT). These are things like the register class for the writemask, etc.
18 // The idea is to pass one of these as the template argument rather than the
19 // individual arguments.
20 // The template is also used for scalar types, in this case numelts is 1.
21 class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
23 RegisterClass RC = rc;
24 ValueType EltVT = eltvt;
25 int NumElts = numelts;
27 // Corresponding mask register class.
28 RegisterClass KRC = !cast<RegisterClass>("VK" # NumElts);
30 // Corresponding write-mask register class.
31 RegisterClass KRCWM = !cast<RegisterClass>("VK" # NumElts # "WM");
34 ValueType KVT = !cast<ValueType>("v" # NumElts # "i1");
36 // Suffix used in the instruction mnemonic.
37 string Suffix = suffix;
39 // VTName is a string name for vector VT. For vector types it will be
40 // v # NumElts # EltVT, so for vector of 8 elements of i32 it will be v8i32
41 // It is a little bit complex for scalar types, where NumElts = 1.
42 // In this case we build v4f32 or v2f64
43 string VTName = "v" # !if (!eq (NumElts, 1),
44 !if (!eq (EltVT.Size, 32), 4,
45 !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
48 ValueType VT = !cast<ValueType>(VTName);
50 string EltTypeName = !cast<string>(EltVT);
51 // Size of the element type in bits, e.g. 32 for v16i32.
52 string EltSizeName = !subst("i", "", !subst("f", "", EltTypeName));
53 int EltSize = EltVT.Size;
55 // "i" for integer types and "f" for floating-point types
56 string TypeVariantName = !subst(EltSizeName, "", EltTypeName);
58 // Size of RC in bits, e.g. 512 for VR512.
61 // The corresponding memory operand, e.g. i512mem for VR512.
62 X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
63 X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
64 // FP scalar memory operand for intrinsics - ssmem/sdmem.
65 Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
66 !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
69 // Note: For 128/256-bit integer VT we choose loadv2i64/loadv4i64
70 // due to load promotion during legalization
71 PatFrag LdFrag = !cast<PatFrag>("load" #
72 !if (!eq (TypeVariantName, "i"),
73 !if (!eq (Size, 128), "v2i64",
74 !if (!eq (Size, 256), "v4i64",
75 !if (!eq (Size, 512), "v8i64",
78 PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
79 !if (!eq (TypeVariantName, "i"),
80 !if (!eq (Size, 128), "v2i64",
81 !if (!eq (Size, 256), "v4i64",
82 !if (!eq (Size, 512), "v8i64",
85 PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
87 ComplexPattern ScalarIntMemCPat = !if (!eq (EltTypeName, "f32"),
88 !cast<ComplexPattern>("sse_load_f32"),
89 !if (!eq (EltTypeName, "f64"),
90 !cast<ComplexPattern>("sse_load_f64"),
93 // The corresponding float type, e.g. v16f32 for v16i32
94 // Note: For EltSize < 32, FloatVT is illegal and TableGen
95 // fails to compile, so we choose FloatVT = VT
96 ValueType FloatVT = !cast<ValueType>(
97 !if (!eq (!srl(EltSize,5),0),
99 !if (!eq(TypeVariantName, "i"),
100 "v" # NumElts # "f" # EltSize,
103 ValueType IntVT = !cast<ValueType>(
104 !if (!eq (!srl(EltSize,5),0),
106 !if (!eq(TypeVariantName, "f"),
107 "v" # NumElts # "i" # EltSize,
109 // The string to specify embedded broadcast in assembly.
110 string BroadcastStr = "{1to" # NumElts # "}";
112 // 8-bit compressed displacement tuple/subvector format. This is only
113 // defined for NumElts <= 8.
114 CD8VForm CD8TupleForm = !if (!eq (!srl(NumElts, 4), 0),
115 !cast<CD8VForm>("CD8VT" # NumElts), ?);
117 SubRegIndex SubRegIdx = !if (!eq (Size, 128), sub_xmm,
118 !if (!eq (Size, 256), sub_ymm, ?));
120 Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
121 !if (!eq (EltTypeName, "f64"), SSEPackedDouble,
124 RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
126 // A vector tye of the same width with element type i64. This is used to
127 // create patterns for logic ops.
128 ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
130 // A vector type of the same width with element type i32. This is used to
131 // create the canonical constant zero node ImmAllZerosV.
132 ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
133 dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV)));
135 string ZSuffix = !if (!eq (Size, 128), "Z128",
136 !if (!eq (Size, 256), "Z256", "Z"));
139 def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
140 def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
141 def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
142 def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
143 def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
144 def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
146 // "x" in v32i8x_info means RC = VR256X
147 def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
148 def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
149 def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
150 def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
151 def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
152 def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
154 def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
155 def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
156 def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
157 def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
158 def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
159 def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
161 // We map scalar types to the smallest (128-bit) vector type
162 // with the appropriate element type. This allows to use the same masking logic.
163 def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
164 def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
165 def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
166 def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
168 class AVX512VLVectorVTInfo<X86VectorVTInfo i512, X86VectorVTInfo i256,
169 X86VectorVTInfo i128> {
170 X86VectorVTInfo info512 = i512;
171 X86VectorVTInfo info256 = i256;
172 X86VectorVTInfo info128 = i128;
175 def avx512vl_i8_info : AVX512VLVectorVTInfo<v64i8_info, v32i8x_info,
177 def avx512vl_i16_info : AVX512VLVectorVTInfo<v32i16_info, v16i16x_info,
179 def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
181 def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
183 def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
185 def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
188 class X86KVectorVTInfo<RegisterClass _krc, RegisterClass _krcwm,
190 RegisterClass KRC = _krc;
191 RegisterClass KRCWM = _krcwm;
195 def v2i1_info : X86KVectorVTInfo<VK2, VK2WM, v2i1>;
196 def v4i1_info : X86KVectorVTInfo<VK4, VK4WM, v4i1>;
197 def v8i1_info : X86KVectorVTInfo<VK8, VK8WM, v8i1>;
198 def v16i1_info : X86KVectorVTInfo<VK16, VK16WM, v16i1>;
199 def v32i1_info : X86KVectorVTInfo<VK32, VK32WM, v32i1>;
200 def v64i1_info : X86KVectorVTInfo<VK64, VK64WM, v64i1>;
202 // This multiclass generates the masking variants from the non-masking
203 // variant. It only provides the assembly pieces for the masking variants.
204 // It assumes custom ISel patterns for masking which can be provided as
205 // template arguments.
206 multiclass AVX512_maskable_custom<bits<8> O, Format F,
208 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
210 string AttSrcAsm, string IntelSrcAsm,
212 list<dag> MaskingPattern,
213 list<dag> ZeroMaskingPattern,
214 string MaskingConstraint = "",
215 InstrItinClass itin = NoItinerary,
216 bit IsCommutable = 0,
217 bit IsKCommutable = 0> {
218 let isCommutable = IsCommutable in
219 def NAME: AVX512<O, F, Outs, Ins,
220 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
221 "$dst, "#IntelSrcAsm#"}",
224 // Prefer over VMOV*rrk Pat<>
225 let isCommutable = IsKCommutable in
226 def NAME#k: AVX512<O, F, Outs, MaskingIns,
227 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
228 "$dst {${mask}}, "#IntelSrcAsm#"}",
229 MaskingPattern, itin>,
231 // In case of the 3src subclass this is overridden with a let.
232 string Constraints = MaskingConstraint;
235 // Zero mask does not add any restrictions to commute operands transformation.
236 // So, it is Ok to use IsCommutable instead of IsKCommutable.
237 let isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
238 def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
239 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
240 "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
247 // Common base class of AVX512_maskable and AVX512_maskable_3src.
248 multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
250 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
252 string AttSrcAsm, string IntelSrcAsm,
253 dag RHS, dag MaskingRHS,
254 SDNode Select = vselect,
255 string MaskingConstraint = "",
256 InstrItinClass itin = NoItinerary,
257 bit IsCommutable = 0,
258 bit IsKCommutable = 0> :
259 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
260 AttSrcAsm, IntelSrcAsm,
261 [(set _.RC:$dst, RHS)],
262 [(set _.RC:$dst, MaskingRHS)],
264 (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
265 MaskingConstraint, NoItinerary, IsCommutable,
268 // Similar to AVX512_maskable_common, but with scalar types.
269 multiclass AVX512_maskable_fp_common<bits<8> O, Format F, X86VectorVTInfo _,
271 dag Ins, dag MaskingIns, dag ZeroMaskingIns,
273 string AttSrcAsm, string IntelSrcAsm,
274 SDNode Select = vselect,
275 string MaskingConstraint = "",
276 InstrItinClass itin = NoItinerary,
277 bit IsCommutable = 0,
278 bit IsKCommutable = 0> :
279 AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
280 AttSrcAsm, IntelSrcAsm,
282 MaskingConstraint, NoItinerary, IsCommutable,
285 // This multiclass generates the unconditional/non-masking, the masking and
286 // the zero-masking variant of the vector instruction. In the masking case, the
287 // perserved vector elements come from a new dummy input operand tied to $dst.
288 multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
289 dag Outs, dag Ins, string OpcodeStr,
290 string AttSrcAsm, string IntelSrcAsm,
292 InstrItinClass itin = NoItinerary,
293 bit IsCommutable = 0, bit IsKCommutable = 0,
294 SDNode Select = vselect> :
295 AVX512_maskable_common<O, F, _, Outs, Ins,
296 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
297 !con((ins _.KRCWM:$mask), Ins),
298 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
299 (Select _.KRCWM:$mask, RHS, _.RC:$src0), Select,
300 "$src0 = $dst", itin, IsCommutable, IsKCommutable>;
302 // This multiclass generates the unconditional/non-masking, the masking and
303 // the zero-masking variant of the scalar instruction.
304 multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
305 dag Outs, dag Ins, string OpcodeStr,
306 string AttSrcAsm, string IntelSrcAsm,
308 InstrItinClass itin = NoItinerary,
309 bit IsCommutable = 0> :
310 AVX512_maskable_common<O, F, _, Outs, Ins,
311 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
312 !con((ins _.KRCWM:$mask), Ins),
313 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
314 (X86selects _.KRCWM:$mask, RHS, _.RC:$src0),
315 X86selects, "$src0 = $dst", itin, IsCommutable>;
317 // Similar to AVX512_maskable but in this case one of the source operands
318 // ($src1) is already tied to $dst so we just use that for the preserved
319 // vector elements. NOTE that the NonTiedIns (the ins dag) should exclude
321 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
322 dag Outs, dag NonTiedIns, string OpcodeStr,
323 string AttSrcAsm, string IntelSrcAsm,
324 dag RHS, bit IsCommutable = 0,
325 bit IsKCommutable = 0> :
326 AVX512_maskable_common<O, F, _, Outs,
327 !con((ins _.RC:$src1), NonTiedIns),
328 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
329 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
330 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
331 (vselect _.KRCWM:$mask, RHS, _.RC:$src1),
332 vselect, "", NoItinerary, IsCommutable, IsKCommutable>;
334 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
335 dag Outs, dag NonTiedIns, string OpcodeStr,
336 string AttSrcAsm, string IntelSrcAsm,
337 dag RHS, bit IsCommutable = 0,
338 bit IsKCommutable = 0> :
339 AVX512_maskable_common<O, F, _, Outs,
340 !con((ins _.RC:$src1), NonTiedIns),
341 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
342 !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
343 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
344 (X86selects _.KRCWM:$mask, RHS, _.RC:$src1),
345 X86selects, "", NoItinerary, IsCommutable,
348 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
351 string AttSrcAsm, string IntelSrcAsm,
353 AVX512_maskable_custom<O, F, Outs, Ins,
354 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
355 !con((ins _.KRCWM:$mask), Ins),
356 OpcodeStr, AttSrcAsm, IntelSrcAsm, Pattern, [], [],
360 // Instruction with mask that puts result in mask register,
361 // like "compare" and "vptest"
362 multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
364 dag Ins, dag MaskingIns,
366 string AttSrcAsm, string IntelSrcAsm,
368 list<dag> MaskingPattern,
369 bit IsCommutable = 0> {
370 let isCommutable = IsCommutable in
371 def NAME: AVX512<O, F, Outs, Ins,
372 OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
373 "$dst, "#IntelSrcAsm#"}",
374 Pattern, NoItinerary>;
376 def NAME#k: AVX512<O, F, Outs, MaskingIns,
377 OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
378 "$dst {${mask}}, "#IntelSrcAsm#"}",
379 MaskingPattern, NoItinerary>, EVEX_K;
382 multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
384 dag Ins, dag MaskingIns,
386 string AttSrcAsm, string IntelSrcAsm,
387 dag RHS, dag MaskingRHS,
388 bit IsCommutable = 0> :
389 AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
390 AttSrcAsm, IntelSrcAsm,
391 [(set _.KRC:$dst, RHS)],
392 [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
394 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
395 dag Outs, dag Ins, string OpcodeStr,
396 string AttSrcAsm, string IntelSrcAsm,
397 dag RHS, bit IsCommutable = 0> :
398 AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
399 !con((ins _.KRCWM:$mask), Ins),
400 OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
401 (and _.KRCWM:$mask, RHS), IsCommutable>;
403 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
404 dag Outs, dag Ins, string OpcodeStr,
405 string AttSrcAsm, string IntelSrcAsm> :
406 AVX512_maskable_custom_cmp<O, F, Outs,
407 Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
408 AttSrcAsm, IntelSrcAsm, [],[]>;
410 // This multiclass generates the unconditional/non-masking, the masking and
411 // the zero-masking variant of the vector instruction. In the masking case, the
412 // perserved vector elements come from a new dummy input operand tied to $dst.
413 multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
414 dag Outs, dag Ins, string OpcodeStr,
415 string AttSrcAsm, string IntelSrcAsm,
416 dag RHS, dag MaskedRHS,
417 InstrItinClass itin = NoItinerary,
418 bit IsCommutable = 0, SDNode Select = vselect> :
419 AVX512_maskable_custom<O, F, Outs, Ins,
420 !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
421 !con((ins _.KRCWM:$mask), Ins),
422 OpcodeStr, AttSrcAsm, IntelSrcAsm,
423 [(set _.RC:$dst, RHS)],
425 (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
427 (Select _.KRCWM:$mask, MaskedRHS,
429 "$src0 = $dst", itin, IsCommutable>;
431 // Bitcasts between 512-bit vector types. Return the original type since
432 // no instruction is needed for the conversion.
433 def : Pat<(v8f64 (bitconvert (v8i64 VR512:$src))), (v8f64 VR512:$src)>;
434 def : Pat<(v8f64 (bitconvert (v16i32 VR512:$src))), (v8f64 VR512:$src)>;
435 def : Pat<(v8f64 (bitconvert (v32i16 VR512:$src))), (v8f64 VR512:$src)>;
436 def : Pat<(v8f64 (bitconvert (v64i8 VR512:$src))), (v8f64 VR512:$src)>;
437 def : Pat<(v8f64 (bitconvert (v16f32 VR512:$src))), (v8f64 VR512:$src)>;
438 def : Pat<(v16f32 (bitconvert (v8i64 VR512:$src))), (v16f32 VR512:$src)>;
439 def : Pat<(v16f32 (bitconvert (v16i32 VR512:$src))), (v16f32 VR512:$src)>;
440 def : Pat<(v16f32 (bitconvert (v32i16 VR512:$src))), (v16f32 VR512:$src)>;
441 def : Pat<(v16f32 (bitconvert (v64i8 VR512:$src))), (v16f32 VR512:$src)>;
442 def : Pat<(v16f32 (bitconvert (v8f64 VR512:$src))), (v16f32 VR512:$src)>;
443 def : Pat<(v8i64 (bitconvert (v16i32 VR512:$src))), (v8i64 VR512:$src)>;
444 def : Pat<(v8i64 (bitconvert (v32i16 VR512:$src))), (v8i64 VR512:$src)>;
445 def : Pat<(v8i64 (bitconvert (v64i8 VR512:$src))), (v8i64 VR512:$src)>;
446 def : Pat<(v8i64 (bitconvert (v8f64 VR512:$src))), (v8i64 VR512:$src)>;
447 def : Pat<(v8i64 (bitconvert (v16f32 VR512:$src))), (v8i64 VR512:$src)>;
448 def : Pat<(v16i32 (bitconvert (v8i64 VR512:$src))), (v16i32 VR512:$src)>;
449 def : Pat<(v16i32 (bitconvert (v16f32 VR512:$src))), (v16i32 VR512:$src)>;
450 def : Pat<(v16i32 (bitconvert (v32i16 VR512:$src))), (v16i32 VR512:$src)>;
451 def : Pat<(v16i32 (bitconvert (v64i8 VR512:$src))), (v16i32 VR512:$src)>;
452 def : Pat<(v16i32 (bitconvert (v8f64 VR512:$src))), (v16i32 VR512:$src)>;
453 def : Pat<(v32i16 (bitconvert (v8i64 VR512:$src))), (v32i16 VR512:$src)>;
454 def : Pat<(v32i16 (bitconvert (v16i32 VR512:$src))), (v32i16 VR512:$src)>;
455 def : Pat<(v32i16 (bitconvert (v64i8 VR512:$src))), (v32i16 VR512:$src)>;
456 def : Pat<(v32i16 (bitconvert (v8f64 VR512:$src))), (v32i16 VR512:$src)>;
457 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
458 def : Pat<(v32i16 (bitconvert (v16f32 VR512:$src))), (v32i16 VR512:$src)>;
459 def : Pat<(v64i8 (bitconvert (v8i64 VR512:$src))), (v64i8 VR512:$src)>;
460 def : Pat<(v64i8 (bitconvert (v16i32 VR512:$src))), (v64i8 VR512:$src)>;
461 def : Pat<(v64i8 (bitconvert (v32i16 VR512:$src))), (v64i8 VR512:$src)>;
462 def : Pat<(v64i8 (bitconvert (v8f64 VR512:$src))), (v64i8 VR512:$src)>;
463 def : Pat<(v64i8 (bitconvert (v16f32 VR512:$src))), (v64i8 VR512:$src)>;
465 // Alias instruction that maps zero vector to pxor / xorp* for AVX-512.
466 // This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then
467 // swizzled by ExecutionDepsFix to pxor.
468 // We set canFoldAsLoad because this can be converted to a constant-pool
469 // load of an all-zeros value if folding it would be beneficial.
470 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
471 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
472 def AVX512_512_SET0 : I<0, Pseudo, (outs VR512:$dst), (ins), "",
473 [(set VR512:$dst, (v16i32 immAllZerosV))]>;
474 def AVX512_512_SETALLONES : I<0, Pseudo, (outs VR512:$dst), (ins), "",
475 [(set VR512:$dst, (v16i32 immAllOnesV))]>;
478 // Alias instructions that allow VPTERNLOG to be used with a mask to create
479 // a mix of all ones and all zeros elements. This is done this way to force
480 // the same register to be used as input for all three sources.
481 let isPseudo = 1, Predicates = [HasAVX512] in {
482 def AVX512_512_SEXT_MASK_32 : I<0, Pseudo, (outs VR512:$dst),
483 (ins VK16WM:$mask), "",
484 [(set VR512:$dst, (vselect (v16i1 VK16WM:$mask),
485 (v16i32 immAllOnesV),
486 (v16i32 immAllZerosV)))]>;
487 def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst),
488 (ins VK8WM:$mask), "",
489 [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask),
490 (bc_v8i64 (v16i32 immAllOnesV)),
491 (bc_v8i64 (v16i32 immAllZerosV))))]>;
494 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
495 isPseudo = 1, Predicates = [HasAVX512], SchedRW = [WriteZero] in {
496 def AVX512_128_SET0 : I<0, Pseudo, (outs VR128X:$dst), (ins), "",
497 [(set VR128X:$dst, (v4i32 immAllZerosV))]>;
498 def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
499 [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
502 // Alias instructions that map fld0 to xorps for sse or vxorps for avx.
503 // This is expanded by ExpandPostRAPseudos.
504 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
505 isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasAVX512] in {
506 def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
507 [(set FR32X:$dst, fp32imm0)]>;
508 def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
509 [(set FR64X:$dst, fpimm0)]>;
512 //===----------------------------------------------------------------------===//
513 // AVX-512 - VECTOR INSERT
515 multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
516 PatFrag vinsert_insert> {
517 let ExeDomain = To.ExeDomain in {
518 defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
519 (ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
520 "vinsert" # From.EltTypeName # "x" # From.NumElts,
521 "$src3, $src2, $src1", "$src1, $src2, $src3",
522 (vinsert_insert:$src3 (To.VT To.RC:$src1),
523 (From.VT From.RC:$src2),
524 (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
526 defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
527 (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
528 "vinsert" # From.EltTypeName # "x" # From.NumElts,
529 "$src3, $src2, $src1", "$src1, $src2, $src3",
530 (vinsert_insert:$src3 (To.VT To.RC:$src1),
531 (From.VT (bitconvert (From.LdFrag addr:$src2))),
532 (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
533 EVEX_CD8<From.EltSize, From.CD8TupleForm>;
537 multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
538 X86VectorVTInfo To, PatFrag vinsert_insert,
539 SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
540 let Predicates = p in {
541 def : Pat<(vinsert_insert:$ins
542 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
543 (To.VT (!cast<Instruction>(InstrStr#"rr")
544 To.RC:$src1, From.RC:$src2,
545 (INSERT_get_vinsert_imm To.RC:$ins)))>;
547 def : Pat<(vinsert_insert:$ins
549 (From.VT (bitconvert (From.LdFrag addr:$src2))),
551 (To.VT (!cast<Instruction>(InstrStr#"rm")
552 To.RC:$src1, addr:$src2,
553 (INSERT_get_vinsert_imm To.RC:$ins)))>;
557 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
558 ValueType EltVT64, int Opcode256> {
560 let Predicates = [HasVLX] in
561 defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
562 X86VectorVTInfo< 4, EltVT32, VR128X>,
563 X86VectorVTInfo< 8, EltVT32, VR256X>,
564 vinsert128_insert>, EVEX_V256;
566 defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
567 X86VectorVTInfo< 4, EltVT32, VR128X>,
568 X86VectorVTInfo<16, EltVT32, VR512>,
569 vinsert128_insert>, EVEX_V512;
571 defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
572 X86VectorVTInfo< 4, EltVT64, VR256X>,
573 X86VectorVTInfo< 8, EltVT64, VR512>,
574 vinsert256_insert>, VEX_W, EVEX_V512;
576 let Predicates = [HasVLX, HasDQI] in
577 defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
578 X86VectorVTInfo< 2, EltVT64, VR128X>,
579 X86VectorVTInfo< 4, EltVT64, VR256X>,
580 vinsert128_insert>, VEX_W, EVEX_V256;
582 let Predicates = [HasDQI] in {
583 defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
584 X86VectorVTInfo< 2, EltVT64, VR128X>,
585 X86VectorVTInfo< 8, EltVT64, VR512>,
586 vinsert128_insert>, VEX_W, EVEX_V512;
588 defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
589 X86VectorVTInfo< 8, EltVT32, VR256X>,
590 X86VectorVTInfo<16, EltVT32, VR512>,
591 vinsert256_insert>, EVEX_V512;
595 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
596 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
598 // Codegen pattern with the alternative types,
599 // Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
600 defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
601 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
602 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
603 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
605 defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
606 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
607 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
608 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
610 defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
611 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
612 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
613 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
615 // Codegen pattern with the alternative types insert VEC128 into VEC256
616 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
617 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
618 defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
619 vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
620 // Codegen pattern with the alternative types insert VEC128 into VEC512
621 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
622 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
623 defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
624 vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
625 // Codegen pattern with the alternative types insert VEC256 into VEC512
626 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
627 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
628 defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
629 vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
631 // vinsertps - insert f32 to XMM
632 let ExeDomain = SSEPackedSingle in {
633 def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
634 (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
635 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
636 [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
638 def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
639 (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
640 "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
641 [(set VR128X:$dst, (X86insertps VR128X:$src1,
642 (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
643 imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
646 //===----------------------------------------------------------------------===//
647 // AVX-512 VECTOR EXTRACT
650 multiclass vextract_for_size<int Opcode,
651 X86VectorVTInfo From, X86VectorVTInfo To,
652 PatFrag vextract_extract,
653 SDNodeXForm EXTRACT_get_vextract_imm> {
655 let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
656 // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to
657 // vextract_extract), we interesting only in patterns without mask,
658 // intrinsics pattern match generated bellow.
659 defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
660 (ins From.RC:$src1, u8imm:$idx),
661 "vextract" # To.EltTypeName # "x" # To.NumElts,
662 "$idx, $src1", "$src1, $idx",
663 [(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
665 AVX512AIi8Base, EVEX;
666 def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
667 (ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
668 "vextract" # To.EltTypeName # "x" # To.NumElts #
669 "\t{$idx, $src1, $dst|$dst, $src1, $idx}",
670 [(store (To.VT (vextract_extract:$idx
671 (From.VT From.RC:$src1), (iPTR imm))),
674 let mayStore = 1, hasSideEffects = 0 in
675 def mrk : AVX512AIi8<Opcode, MRMDestMem, (outs),
676 (ins To.MemOp:$dst, To.KRCWM:$mask,
677 From.RC:$src1, u8imm:$idx),
678 "vextract" # To.EltTypeName # "x" # To.NumElts #
679 "\t{$idx, $src1, $dst {${mask}}|"
680 "$dst {${mask}}, $src1, $idx}",
684 def : Pat<(To.VT (vselect To.KRCWM:$mask,
685 (vextract_extract:$ext (From.VT From.RC:$src1),
688 (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
689 From.ZSuffix # "rrk")
690 To.RC:$src0, To.KRCWM:$mask, From.RC:$src1,
691 (EXTRACT_get_vextract_imm To.RC:$ext))>;
693 def : Pat<(To.VT (vselect To.KRCWM:$mask,
694 (vextract_extract:$ext (From.VT From.RC:$src1),
697 (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
698 From.ZSuffix # "rrkz")
699 To.KRCWM:$mask, From.RC:$src1,
700 (EXTRACT_get_vextract_imm To.RC:$ext))>;
703 // Codegen pattern for the alternative types
704 multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
705 X86VectorVTInfo To, PatFrag vextract_extract,
706 SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> {
707 let Predicates = p in {
708 def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
709 (To.VT (!cast<Instruction>(InstrStr#"rr")
711 (EXTRACT_get_vextract_imm To.RC:$ext)))>;
712 def : Pat<(store (To.VT (vextract_extract:$ext (From.VT From.RC:$src1),
713 (iPTR imm))), addr:$dst),
714 (!cast<Instruction>(InstrStr#"mr") addr:$dst, From.RC:$src1,
715 (EXTRACT_get_vextract_imm To.RC:$ext))>;
719 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
720 ValueType EltVT64, int Opcode256> {
721 defm NAME # "32x4Z" : vextract_for_size<Opcode128,
722 X86VectorVTInfo<16, EltVT32, VR512>,
723 X86VectorVTInfo< 4, EltVT32, VR128X>,
725 EXTRACT_get_vextract128_imm>,
726 EVEX_V512, EVEX_CD8<32, CD8VT4>;
727 defm NAME # "64x4Z" : vextract_for_size<Opcode256,
728 X86VectorVTInfo< 8, EltVT64, VR512>,
729 X86VectorVTInfo< 4, EltVT64, VR256X>,
731 EXTRACT_get_vextract256_imm>,
732 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
733 let Predicates = [HasVLX] in
734 defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
735 X86VectorVTInfo< 8, EltVT32, VR256X>,
736 X86VectorVTInfo< 4, EltVT32, VR128X>,
738 EXTRACT_get_vextract128_imm>,
739 EVEX_V256, EVEX_CD8<32, CD8VT4>;
740 let Predicates = [HasVLX, HasDQI] in
741 defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
742 X86VectorVTInfo< 4, EltVT64, VR256X>,
743 X86VectorVTInfo< 2, EltVT64, VR128X>,
745 EXTRACT_get_vextract128_imm>,
746 VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
747 let Predicates = [HasDQI] in {
748 defm NAME # "64x2Z" : vextract_for_size<Opcode128,
749 X86VectorVTInfo< 8, EltVT64, VR512>,
750 X86VectorVTInfo< 2, EltVT64, VR128X>,
752 EXTRACT_get_vextract128_imm>,
753 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
754 defm NAME # "32x8Z" : vextract_for_size<Opcode256,
755 X86VectorVTInfo<16, EltVT32, VR512>,
756 X86VectorVTInfo< 8, EltVT32, VR256X>,
758 EXTRACT_get_vextract256_imm>,
759 EVEX_V512, EVEX_CD8<32, CD8VT8>;
763 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
764 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
766 // extract_subvector codegen patterns with the alternative types.
767 // Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
768 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
769 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
770 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
771 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
773 defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
774 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
775 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
776 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
778 defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
779 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
780 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
781 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
783 // Codegen pattern with the alternative types extract VEC128 from VEC256
784 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
785 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
786 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
787 vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
789 // Codegen pattern with the alternative types extract VEC128 from VEC512
790 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
791 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
792 defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
793 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
794 // Codegen pattern with the alternative types extract VEC256 from VEC512
795 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
796 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
797 defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
798 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
800 // A 128-bit subvector extract from the first 256-bit vector position
801 // is a subregister copy that needs no instruction.
802 def : Pat<(v2i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
803 (v2i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
804 def : Pat<(v2f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
805 (v2f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
806 def : Pat<(v4i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
807 (v4i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
808 def : Pat<(v4f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
809 (v4f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
810 def : Pat<(v8i16 (extract_subvector (v32i16 VR512:$src), (iPTR 0))),
811 (v8i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_xmm))>;
812 def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 0))),
813 (v16i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_xmm))>;
815 // A 256-bit subvector extract from the first 256-bit vector position
816 // is a subregister copy that needs no instruction.
817 def : Pat<(v4i64 (extract_subvector (v8i64 VR512:$src), (iPTR 0))),
818 (v4i64 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_ymm))>;
819 def : Pat<(v4f64 (extract_subvector (v8f64 VR512:$src), (iPTR 0))),
820 (v4f64 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_ymm))>;
821 def : Pat<(v8i32 (extract_subvector (v16i32 VR512:$src), (iPTR 0))),
822 (v8i32 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_ymm))>;
823 def : Pat<(v8f32 (extract_subvector (v16f32 VR512:$src), (iPTR 0))),
824 (v8f32 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_ymm))>;
825 def : Pat<(v16i16 (extract_subvector (v32i16 VR512:$src), (iPTR 0))),
826 (v16i16 (EXTRACT_SUBREG (v32i16 VR512:$src), sub_ymm))>;
827 def : Pat<(v32i8 (extract_subvector (v64i8 VR512:$src), (iPTR 0))),
828 (v32i8 (EXTRACT_SUBREG (v64i8 VR512:$src), sub_ymm))>;
830 let AddedComplexity = 25 in { // to give priority over vinsertf128rm
831 // A 128-bit subvector insert to the first 512-bit vector position
832 // is a subregister copy that needs no instruction.
833 def : Pat<(v8i64 (insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0))),
834 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
835 def : Pat<(v8f64 (insert_subvector undef, (v2f64 VR128X:$src), (iPTR 0))),
836 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
837 def : Pat<(v16i32 (insert_subvector undef, (v4i32 VR128X:$src), (iPTR 0))),
838 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
839 def : Pat<(v16f32 (insert_subvector undef, (v4f32 VR128X:$src), (iPTR 0))),
840 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
841 def : Pat<(v32i16 (insert_subvector undef, (v8i16 VR128X:$src), (iPTR 0))),
842 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
843 def : Pat<(v64i8 (insert_subvector undef, (v16i8 VR128X:$src), (iPTR 0))),
844 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)>;
846 // A 256-bit subvector insert to the first 512-bit vector position
847 // is a subregister copy that needs no instruction.
848 def : Pat<(v8i64 (insert_subvector undef, (v4i64 VR256X:$src), (iPTR 0))),
849 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
850 def : Pat<(v8f64 (insert_subvector undef, (v4f64 VR256X:$src), (iPTR 0))),
851 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
852 def : Pat<(v16i32 (insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0))),
853 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
854 def : Pat<(v16f32 (insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0))),
855 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
856 def : Pat<(v32i16 (insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0))),
857 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
858 def : Pat<(v64i8 (insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0))),
859 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
862 // vextractps - extract 32 bits from XMM
863 def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
864 (ins VR128X:$src1, u8imm:$src2),
865 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
866 [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
869 def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
870 (ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
871 "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
872 [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
873 addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>;
875 //===---------------------------------------------------------------------===//
878 // broadcast with a scalar argument.
879 multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
880 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
881 def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
882 (!cast<Instruction>(NAME#DestInfo.ZSuffix#r)
883 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
884 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
885 (X86VBroadcast SrcInfo.FRC:$src),
887 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rk)
888 DestInfo.RC:$src0, DestInfo.KRCWM:$mask,
889 (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
890 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
891 (X86VBroadcast SrcInfo.FRC:$src),
892 DestInfo.ImmAllZerosV)),
893 (!cast<Instruction>(NAME#DestInfo.ZSuffix#rkz)
894 DestInfo.KRCWM:$mask, (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC))>;
897 multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
898 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
899 let ExeDomain = DestInfo.ExeDomain in {
900 defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
901 (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
902 (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
904 defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
905 (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
906 (DestInfo.VT (X86VBroadcast
907 (SrcInfo.ScalarLdFrag addr:$src)))>,
908 T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
911 def : Pat<(DestInfo.VT (X86VBroadcast
912 (SrcInfo.VT (scalar_to_vector
913 (SrcInfo.ScalarLdFrag addr:$src))))),
914 (!cast<Instruction>(NAME#DestInfo.ZSuffix#m) addr:$src)>;
915 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
917 (SrcInfo.VT (scalar_to_vector
918 (SrcInfo.ScalarLdFrag addr:$src)))),
920 (!cast<Instruction>(NAME#DestInfo.ZSuffix#mk)
921 DestInfo.RC:$src0, DestInfo.KRCWM:$mask, addr:$src)>;
922 def : Pat<(DestInfo.VT (vselect DestInfo.KRCWM:$mask,
924 (SrcInfo.VT (scalar_to_vector
925 (SrcInfo.ScalarLdFrag addr:$src)))),
926 DestInfo.ImmAllZerosV)),
927 (!cast<Instruction>(NAME#DestInfo.ZSuffix#mkz)
928 DestInfo.KRCWM:$mask, addr:$src)>;
931 multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
932 AVX512VLVectorVTInfo _> {
933 let Predicates = [HasAVX512] in
934 defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
935 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
938 let Predicates = [HasVLX] in {
939 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
940 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
945 multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
946 AVX512VLVectorVTInfo _> {
947 let Predicates = [HasAVX512] in
948 defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
949 avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
952 let Predicates = [HasVLX] in {
953 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
954 avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
956 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
957 avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
961 defm VBROADCASTSS : avx512_fp_broadcast_ss<0x18, "vbroadcastss",
963 defm VBROADCASTSD : avx512_fp_broadcast_sd<0x19, "vbroadcastsd",
964 avx512vl_f64_info>, VEX_W;
966 def : Pat<(int_x86_avx512_vbroadcast_ss_512 addr:$src),
967 (VBROADCASTSSZm addr:$src)>;
968 def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
969 (VBROADCASTSDZm addr:$src)>;
971 multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
972 SDPatternOperator OpNode,
973 RegisterClass SrcRC> {
974 let ExeDomain = _.ExeDomain in
975 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
977 "vpbroadcast"##_.Suffix, "$src", "$src",
978 (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX;
981 multiclass avx512_int_broadcast_reg_vl<bits<8> opc, AVX512VLVectorVTInfo _,
982 SDPatternOperator OpNode,
983 RegisterClass SrcRC, Predicate prd> {
984 let Predicates = [prd] in
985 defm Z : avx512_int_broadcast_reg<opc, _.info512, OpNode, SrcRC>, EVEX_V512;
986 let Predicates = [prd, HasVLX] in {
987 defm Z256 : avx512_int_broadcast_reg<opc, _.info256, OpNode, SrcRC>, EVEX_V256;
988 defm Z128 : avx512_int_broadcast_reg<opc, _.info128, OpNode, SrcRC>, EVEX_V128;
992 let isCodeGenOnly = 1 in {
993 defm VPBROADCASTBr : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
994 X86VBroadcast, GR8, HasBWI>;
995 defm VPBROADCASTWr : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
996 X86VBroadcast, GR16, HasBWI>;
998 let isAsmParserOnly = 1 in {
999 defm VPBROADCASTBr_Alt : avx512_int_broadcast_reg_vl<0x7A, avx512vl_i8_info,
1000 null_frag, GR32, HasBWI>;
1001 defm VPBROADCASTWr_Alt : avx512_int_broadcast_reg_vl<0x7B, avx512vl_i16_info,
1002 null_frag, GR32, HasBWI>;
1004 defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
1005 X86VBroadcast, GR32, HasAVX512>;
1006 defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
1007 X86VBroadcast, GR64, HasAVX512>, VEX_W;
1009 def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
1010 (VPBROADCASTDrZrkz VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
1011 def : Pat <(v8i64 (X86vzext VK8WM:$mask)),
1012 (VPBROADCASTQrZrkz VK8WM:$mask, (i64 (MOV64ri 0x1)))>;
1014 // Provide aliases for broadcast from the same register class that
1015 // automatically does the extract.
1016 multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
1017 X86VectorVTInfo SrcInfo> {
1018 def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
1019 (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
1020 (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
1023 multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
1024 AVX512VLVectorVTInfo _, Predicate prd> {
1025 let Predicates = [prd] in {
1026 defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
1027 avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
1029 // Defined separately to avoid redefinition.
1030 defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
1032 let Predicates = [prd, HasVLX] in {
1033 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
1034 avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
1036 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
1041 defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
1042 avx512vl_i8_info, HasBWI>;
1043 defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
1044 avx512vl_i16_info, HasBWI>;
1045 defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
1046 avx512vl_i32_info, HasAVX512>;
1047 defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
1048 avx512vl_i64_info, HasAVX512>, VEX_W;
1050 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
1051 X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
1052 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
1053 (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
1054 (_Dst.VT (X86SubVBroadcast
1055 (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
1059 let Predicates = [HasAVX512] in {
1060 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1061 def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
1062 (VPBROADCASTQZm addr:$src)>;
1065 let Predicates = [HasVLX, HasBWI] in {
1066 // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
1067 def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))),
1068 (VPBROADCASTQZ128m addr:$src)>;
1069 def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))),
1070 (VPBROADCASTQZ256m addr:$src)>;
1071 // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
1072 // This means we'll encounter truncated i32 loads; match that here.
1073 def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1074 (VPBROADCASTWZ128m addr:$src)>;
1075 def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
1076 (VPBROADCASTWZ256m addr:$src)>;
1077 def : Pat<(v8i16 (X86VBroadcast
1078 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1079 (VPBROADCASTWZ128m addr:$src)>;
1080 def : Pat<(v16i16 (X86VBroadcast
1081 (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
1082 (VPBROADCASTWZ256m addr:$src)>;
1085 //===----------------------------------------------------------------------===//
1086 // AVX-512 BROADCAST SUBVECTORS
1089 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1090 v16i32_info, v4i32x_info>,
1091 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1092 defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1093 v16f32_info, v4f32x_info>,
1094 EVEX_V512, EVEX_CD8<32, CD8VT4>;
1095 defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
1096 v8i64_info, v4i64x_info>, VEX_W,
1097 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1098 defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
1099 v8f64_info, v4f64x_info>, VEX_W,
1100 EVEX_V512, EVEX_CD8<64, CD8VT4>;
1102 let Predicates = [HasAVX512] in {
1103 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
1104 (VBROADCASTI64X4rm addr:$src)>;
1105 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
1106 (VBROADCASTI64X4rm addr:$src)>;
1108 // Provide fallback in case the load node that is used in the patterns above
1109 // is used by additional users, which prevents the pattern selection.
1110 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
1111 (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1112 (v4f64 VR256X:$src), 1)>;
1113 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
1114 (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1115 (v4i64 VR256X:$src), 1)>;
1116 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
1117 (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1118 (v16i16 VR256X:$src), 1)>;
1119 def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
1120 (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1121 (v32i8 VR256X:$src), 1)>;
1123 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1124 (VBROADCASTI32X4rm addr:$src)>;
1125 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1126 (VBROADCASTI32X4rm addr:$src)>;
1129 let Predicates = [HasVLX] in {
1130 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
1131 v8i32x_info, v4i32x_info>,
1132 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1133 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
1134 v8f32x_info, v4f32x_info>,
1135 EVEX_V256, EVEX_CD8<32, CD8VT4>;
1137 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
1138 (VBROADCASTI32X4Z256rm addr:$src)>;
1139 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
1140 (VBROADCASTI32X4Z256rm addr:$src)>;
1142 // Provide fallback in case the load node that is used in the patterns above
1143 // is used by additional users, which prevents the pattern selection.
1144 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
1145 (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1146 (v4f32 VR128X:$src), 1)>;
1147 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
1148 (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1149 (v4i32 VR128X:$src), 1)>;
1150 def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
1151 (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1152 (v8i16 VR128X:$src), 1)>;
1153 def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
1154 (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1155 (v16i8 VR128X:$src), 1)>;
1158 let Predicates = [HasVLX, HasDQI] in {
1159 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
1160 v4i64x_info, v2i64x_info>, VEX_W,
1161 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1162 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
1163 v4f64x_info, v2f64x_info>, VEX_W,
1164 EVEX_V256, EVEX_CD8<64, CD8VT2>;
1166 // Provide fallback in case the load node that is used in the patterns above
1167 // is used by additional users, which prevents the pattern selection.
1168 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1169 (VINSERTF64x2Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1170 (v2f64 VR128X:$src), 1)>;
1171 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1172 (VINSERTI64x2Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1173 (v2i64 VR128X:$src), 1)>;
1176 let Predicates = [HasVLX, NoDQI] in {
1177 def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1178 (VBROADCASTF32X4Z256rm addr:$src)>;
1179 def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1180 (VBROADCASTI32X4Z256rm addr:$src)>;
1182 // Provide fallback in case the load node that is used in the patterns above
1183 // is used by additional users, which prevents the pattern selection.
1184 def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
1185 (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1186 (v2f64 VR128X:$src), 1)>;
1187 def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
1188 (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
1189 (v2i64 VR128X:$src), 1)>;
1192 let Predicates = [HasAVX512, NoDQI] in {
1193 def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
1194 (VBROADCASTF32X4rm addr:$src)>;
1195 def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
1196 (VBROADCASTI32X4rm addr:$src)>;
1198 def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
1199 (VBROADCASTF64X4rm addr:$src)>;
1200 def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
1201 (VBROADCASTI64X4rm addr:$src)>;
1203 // Provide fallback in case the load node that is used in the patterns above
1204 // is used by additional users, which prevents the pattern selection.
1205 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1206 (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1207 (v8f32 VR256X:$src), 1)>;
1208 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1209 (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1210 (v8i32 VR256X:$src), 1)>;
1213 let Predicates = [HasDQI] in {
1214 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
1215 v8i64_info, v2i64x_info>, VEX_W,
1216 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1217 defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8",
1218 v16i32_info, v8i32x_info>,
1219 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1220 defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
1221 v8f64_info, v2f64x_info>, VEX_W,
1222 EVEX_V512, EVEX_CD8<64, CD8VT2>;
1223 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
1224 v16f32_info, v8f32x_info>,
1225 EVEX_V512, EVEX_CD8<32, CD8VT8>;
1227 // Provide fallback in case the load node that is used in the patterns above
1228 // is used by additional users, which prevents the pattern selection.
1229 def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
1230 (VINSERTF32x8Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1231 (v8f32 VR256X:$src), 1)>;
1232 def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
1233 (VINSERTI32x8Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
1234 (v8i32 VR256X:$src), 1)>;
1237 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
1238 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
1239 let Predicates = [HasDQI] in
1240 defm Z : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info512, _Src.info128>,
1242 let Predicates = [HasDQI, HasVLX] in
1243 defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info256, _Src.info128>,
1247 multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
1248 AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
1249 avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
1251 let Predicates = [HasDQI, HasVLX] in
1252 defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info128, _Src.info128>,
1256 defm VBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
1257 avx512vl_i32_info, avx512vl_i64_info>;
1258 defm VBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
1259 avx512vl_f32_info, avx512vl_f64_info>;
1261 let Predicates = [HasVLX] in {
1262 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256X:$src))),
1263 (VBROADCASTSSZ256r (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1264 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256X:$src))),
1265 (VBROADCASTSDZ256r (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1268 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
1269 (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
1270 def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
1271 (VBROADCASTSSZr (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm))>;
1273 def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
1274 (VBROADCASTSDZr (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm))>;
1275 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
1276 (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
1278 //===----------------------------------------------------------------------===//
1279 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
1281 multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
1282 X86VectorVTInfo _, RegisterClass KRC> {
1283 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
1284 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
1285 [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
1288 multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
1289 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
1290 let Predicates = [HasCDI] in
1291 defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
1292 let Predicates = [HasCDI, HasVLX] in {
1293 defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
1294 defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
1298 defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
1299 avx512vl_i32_info, VK16>;
1300 defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1301 avx512vl_i64_info, VK8>, VEX_W;
1303 //===----------------------------------------------------------------------===//
1304 // -- VPERMI2 - 3 source operands form --
1305 multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
1306 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1307 // The index operand in the pattern should really be an integer type. However,
1308 // if we do that and it happens to come from a bitcast, then it becomes
1309 // difficult to find the bitcast needed to convert the index to the
1310 // destination type for the passthru since it will be folded with the bitcast
1311 // of the index operand.
1312 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1313 (ins _.RC:$src2, _.RC:$src3),
1314 OpcodeStr, "$src3, $src2", "$src2, $src3",
1315 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>, EVEX_4V,
1318 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1319 (ins _.RC:$src2, _.MemOp:$src3),
1320 OpcodeStr, "$src3, $src2", "$src2, $src3",
1321 (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
1322 (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
1323 EVEX_4V, AVX5128IBase;
1326 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1327 X86VectorVTInfo _> {
1328 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1329 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1330 (ins _.RC:$src2, _.ScalarMemOp:$src3),
1331 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1332 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1333 (_.VT (X86VPermi2X _.RC:$src1,
1334 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1335 1>, AVX5128IBase, EVEX_4V, EVEX_B;
1338 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
1339 AVX512VLVectorVTInfo VTInfo> {
1340 defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>,
1341 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
1342 let Predicates = [HasVLX] in {
1343 defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>,
1344 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
1345 defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>,
1346 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
1350 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
1351 AVX512VLVectorVTInfo VTInfo,
1353 let Predicates = [Prd] in
1354 defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
1355 let Predicates = [Prd, HasVLX] in {
1356 defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
1357 defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
1361 defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d",
1362 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1363 defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q",
1364 avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1365 defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w",
1366 avx512vl_i16_info, HasBWI>,
1367 VEX_W, EVEX_CD8<16, CD8VF>;
1368 defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b",
1369 avx512vl_i8_info, HasVBMI>,
1371 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
1372 avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
1373 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
1374 avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1377 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
1378 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1379 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1380 defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
1381 (ins IdxVT.RC:$src2, _.RC:$src3),
1382 OpcodeStr, "$src3, $src2", "$src2, $src3",
1383 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
1384 EVEX_4V, AVX5128IBase;
1386 defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1387 (ins IdxVT.RC:$src2, _.MemOp:$src3),
1388 OpcodeStr, "$src3, $src2", "$src2, $src3",
1389 (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
1390 (bitconvert (_.LdFrag addr:$src3)))), 1>,
1391 EVEX_4V, AVX5128IBase;
1394 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
1395 X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1396 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1397 defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
1398 (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
1399 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1400 !strconcat("$src2, ${src3}", _.BroadcastStr ),
1401 (_.VT (X86VPermt2 _.RC:$src1,
1402 IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
1403 1>, AVX5128IBase, EVEX_4V, EVEX_B;
1406 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
1407 AVX512VLVectorVTInfo VTInfo,
1408 AVX512VLVectorVTInfo ShuffleMask> {
1409 defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
1410 ShuffleMask.info512>,
1411 avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info512,
1412 ShuffleMask.info512>, EVEX_V512;
1413 let Predicates = [HasVLX] in {
1414 defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
1415 ShuffleMask.info128>,
1416 avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info128,
1417 ShuffleMask.info128>, EVEX_V128;
1418 defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
1419 ShuffleMask.info256>,
1420 avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info256,
1421 ShuffleMask.info256>, EVEX_V256;
1425 multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
1426 AVX512VLVectorVTInfo VTInfo,
1427 AVX512VLVectorVTInfo Idx,
1429 let Predicates = [Prd] in
1430 defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
1431 Idx.info512>, EVEX_V512;
1432 let Predicates = [Prd, HasVLX] in {
1433 defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
1434 Idx.info128>, EVEX_V128;
1435 defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
1436 Idx.info256>, EVEX_V256;
1440 defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d",
1441 avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1442 defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q",
1443 avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1444 defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w",
1445 avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
1446 VEX_W, EVEX_CD8<16, CD8VF>;
1447 defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b",
1448 avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
1450 defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps",
1451 avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
1452 defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
1453 avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
1455 //===----------------------------------------------------------------------===//
1456 // AVX-512 - BLEND using mask
1458 multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
1459 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
1460 def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1461 (ins _.RC:$src1, _.RC:$src2),
1462 !strconcat(OpcodeStr,
1463 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1465 def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1466 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1467 !strconcat(OpcodeStr,
1468 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1469 []>, EVEX_4V, EVEX_K;
1470 def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
1471 (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1472 !strconcat(OpcodeStr,
1473 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1474 []>, EVEX_4V, EVEX_KZ;
1475 let mayLoad = 1 in {
1476 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1477 (ins _.RC:$src1, _.MemOp:$src2),
1478 !strconcat(OpcodeStr,
1479 "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
1480 []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
1481 def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1482 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1483 !strconcat(OpcodeStr,
1484 "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
1485 []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>;
1486 def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1487 (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1488 !strconcat(OpcodeStr,
1489 "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
1490 []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>;
1494 multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
1496 let mayLoad = 1, hasSideEffects = 0 in {
1497 def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1498 (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2),
1499 !strconcat(OpcodeStr,
1500 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1501 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1502 []>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
1504 def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
1505 (ins _.RC:$src1, _.ScalarMemOp:$src2),
1506 !strconcat(OpcodeStr,
1507 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
1508 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1509 []>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
1513 multiclass blendmask_dq <bits<8> opc, string OpcodeStr,
1514 AVX512VLVectorVTInfo VTInfo> {
1515 defm Z : avx512_blendmask <opc, OpcodeStr, VTInfo.info512>,
1516 avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
1518 let Predicates = [HasVLX] in {
1519 defm Z256 : avx512_blendmask<opc, OpcodeStr, VTInfo.info256>,
1520 avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
1521 defm Z128 : avx512_blendmask<opc, OpcodeStr, VTInfo.info128>,
1522 avx512_blendmask_rmb <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
1526 multiclass blendmask_bw <bits<8> opc, string OpcodeStr,
1527 AVX512VLVectorVTInfo VTInfo> {
1528 let Predicates = [HasBWI] in
1529 defm Z : avx512_blendmask <opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
1531 let Predicates = [HasBWI, HasVLX] in {
1532 defm Z256 : avx512_blendmask <opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
1533 defm Z128 : avx512_blendmask <opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
1538 defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", avx512vl_f32_info>;
1539 defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", avx512vl_f64_info>, VEX_W;
1540 defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", avx512vl_i32_info>;
1541 defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", avx512vl_i64_info>, VEX_W;
1542 defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", avx512vl_i8_info>;
1543 defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", avx512vl_i16_info>, VEX_W;
1546 //===----------------------------------------------------------------------===//
1547 // Compare Instructions
1548 //===----------------------------------------------------------------------===//
1550 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
1552 multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>{
1554 defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1556 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1557 "vcmp${cc}"#_.Suffix,
1558 "$src2, $src1", "$src1, $src2",
1559 (OpNode (_.VT _.RC:$src1),
1563 defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
1565 (ins _.RC:$src1, _.IntScalarMemOp:$src2, AVXCC:$cc),
1566 "vcmp${cc}"#_.Suffix,
1567 "$src2, $src1", "$src1, $src2",
1568 (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
1569 imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
1571 defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
1573 (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
1574 "vcmp${cc}"#_.Suffix,
1575 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
1576 (OpNodeRnd (_.VT _.RC:$src1),
1579 (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B;
1580 // Accept explicit immediate argument form instead of comparison code.
1581 let isAsmParserOnly = 1, hasSideEffects = 0 in {
1582 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1584 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1586 "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
1588 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
1590 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
1592 "$cc, $src2, $src1", "$src1, $src2, $cc">,
1593 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
1595 defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
1597 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
1599 "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
1601 }// let isAsmParserOnly = 1, hasSideEffects = 0
1603 let isCodeGenOnly = 1 in {
1604 let isCommutable = 1 in
1605 def rr : AVX512Ii8<0xC2, MRMSrcReg,
1606 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
1607 !strconcat("vcmp${cc}", _.Suffix,
1608 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1609 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1612 IIC_SSE_ALU_F32S_RR>, EVEX_4V;
1613 def rm : AVX512Ii8<0xC2, MRMSrcMem,
1615 (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
1616 !strconcat("vcmp${cc}", _.Suffix,
1617 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1618 [(set _.KRC:$dst, (OpNode _.FRC:$src1,
1619 (_.ScalarLdFrag addr:$src2),
1621 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
1625 let Predicates = [HasAVX512] in {
1626 let ExeDomain = SSEPackedSingle in
1627 defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
1629 let ExeDomain = SSEPackedDouble in
1630 defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
1631 AVX512XDIi8Base, VEX_W;
1634 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
1635 X86VectorVTInfo _, bit IsCommutable> {
1636 let isCommutable = IsCommutable in
1637 def rr : AVX512BI<opc, MRMSrcReg,
1638 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
1639 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1640 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))],
1641 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1642 def rm : AVX512BI<opc, MRMSrcMem,
1643 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2),
1644 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1645 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1646 (_.VT (bitconvert (_.LdFrag addr:$src2)))))],
1647 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1648 let isCommutable = IsCommutable in
1649 def rrk : AVX512BI<opc, MRMSrcReg,
1650 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
1651 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1652 "$dst {${mask}}, $src1, $src2}"),
1653 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1654 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))],
1655 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1656 def rmk : AVX512BI<opc, MRMSrcMem,
1657 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
1658 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|",
1659 "$dst {${mask}}, $src1, $src2}"),
1660 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1661 (OpNode (_.VT _.RC:$src1),
1663 (_.LdFrag addr:$src2))))))],
1664 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
1667 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
1668 X86VectorVTInfo _, bit IsCommutable> :
1669 avx512_icmp_packed<opc, OpcodeStr, OpNode, _, IsCommutable> {
1670 def rmb : AVX512BI<opc, MRMSrcMem,
1671 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
1672 !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
1673 "|$dst, $src1, ${src2}", _.BroadcastStr, "}"),
1674 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1675 (X86VBroadcast (_.ScalarLdFrag addr:$src2))))],
1676 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
1677 def rmbk : AVX512BI<opc, MRMSrcMem,
1678 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
1679 _.ScalarMemOp:$src2),
1680 !strconcat(OpcodeStr,
1681 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
1682 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
1683 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1684 (OpNode (_.VT _.RC:$src1),
1686 (_.ScalarLdFrag addr:$src2)))))],
1687 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
1690 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
1691 AVX512VLVectorVTInfo VTInfo, Predicate prd,
1692 bit IsCommutable = 0> {
1693 let Predicates = [prd] in
1694 defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512,
1695 IsCommutable>, EVEX_V512;
1697 let Predicates = [prd, HasVLX] in {
1698 defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256,
1699 IsCommutable>, EVEX_V256;
1700 defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128,
1701 IsCommutable>, EVEX_V128;
1705 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
1706 SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
1707 Predicate prd, bit IsCommutable = 0> {
1708 let Predicates = [prd] in
1709 defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
1710 IsCommutable>, EVEX_V512;
1712 let Predicates = [prd, HasVLX] in {
1713 defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
1714 IsCommutable>, EVEX_V256;
1715 defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
1716 IsCommutable>, EVEX_V128;
1720 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
1721 avx512vl_i8_info, HasBWI, 1>,
1724 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
1725 avx512vl_i16_info, HasBWI, 1>,
1726 EVEX_CD8<16, CD8VF>;
1728 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
1729 avx512vl_i32_info, HasAVX512, 1>,
1730 EVEX_CD8<32, CD8VF>;
1732 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
1733 avx512vl_i64_info, HasAVX512, 1>,
1734 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1736 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
1737 avx512vl_i8_info, HasBWI>,
1740 defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm,
1741 avx512vl_i16_info, HasBWI>,
1742 EVEX_CD8<16, CD8VF>;
1744 defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm,
1745 avx512vl_i32_info, HasAVX512>,
1746 EVEX_CD8<32, CD8VF>;
1748 defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
1749 avx512vl_i64_info, HasAVX512>,
1750 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
1753 multiclass avx512_icmp_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
1754 SDNode OpNode, string InstrStr,
1755 list<Predicate> Preds> {
1756 let Predicates = Preds in {
1757 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
1758 (_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
1760 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rr) _.RC:$src1, _.RC:$src2),
1763 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
1764 (_.KVT (OpNode (_.VT _.RC:$src1),
1765 (_.VT (bitconvert (_.LdFrag addr:$src2))))),
1767 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rm) _.RC:$src1, addr:$src2),
1770 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
1771 (_.KVT (and _.KRCWM:$mask,
1772 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))),
1774 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrk) _.KRCWM:$mask,
1775 _.RC:$src1, _.RC:$src2),
1778 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
1779 (_.KVT (and (_.KVT _.KRCWM:$mask),
1780 (_.KVT (OpNode (_.VT _.RC:$src1),
1782 (_.LdFrag addr:$src2))))))),
1784 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmk) _.KRCWM:$mask,
1785 _.RC:$src1, addr:$src2),
1790 multiclass avx512_icmp_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
1791 SDNode OpNode, string InstrStr,
1792 list<Predicate> Preds>
1793 : avx512_icmp_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
1794 let Predicates = Preds in {
1795 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
1796 (_.KVT (OpNode (_.VT _.RC:$src1),
1797 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
1799 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmb) _.RC:$src1, addr:$src2),
1802 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
1803 (_.KVT (and (_.KVT _.KRCWM:$mask),
1804 (_.KVT (OpNode (_.VT _.RC:$src1),
1806 (_.ScalarLdFrag addr:$src2)))))),
1808 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbk) _.KRCWM:$mask,
1809 _.RC:$src1, addr:$src2),
1815 defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpeqm,
1816 "VPCMPEQBZ128", [HasBWI, HasVLX]>;
1817 defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpeqm,
1818 "VPCMPEQBZ128", [HasBWI, HasVLX]>;
1820 defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpeqm,
1821 "VPCMPEQBZ256", [HasBWI, HasVLX]>;
1824 defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpeqm,
1825 "VPCMPEQWZ128", [HasBWI, HasVLX]>;
1826 defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpeqm,
1827 "VPCMPEQWZ128", [HasBWI, HasVLX]>;
1828 defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpeqm,
1829 "VPCMPEQWZ128", [HasBWI, HasVLX]>;
1831 defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpeqm,
1832 "VPCMPEQWZ256", [HasBWI, HasVLX]>;
1833 defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpeqm,
1834 "VPCMPEQWZ256", [HasBWI, HasVLX]>;
1836 defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpeqm,
1837 "VPCMPEQWZ", [HasBWI]>;
1840 defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpeqm,
1841 "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
1842 defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpeqm,
1843 "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
1844 defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpeqm,
1845 "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
1846 defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpeqm,
1847 "VPCMPEQDZ128", [HasAVX512, HasVLX]>;
1849 defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpeqm,
1850 "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
1851 defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpeqm,
1852 "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
1853 defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpeqm,
1854 "VPCMPEQDZ256", [HasAVX512, HasVLX]>;
1856 defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpeqm,
1857 "VPCMPEQDZ", [HasAVX512]>;
1858 defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpeqm,
1859 "VPCMPEQDZ", [HasAVX512]>;
1862 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpeqm,
1863 "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
1864 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpeqm,
1865 "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
1866 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpeqm,
1867 "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
1868 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpeqm,
1869 "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
1870 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpeqm,
1871 "VPCMPEQQZ128", [HasAVX512, HasVLX]>;
1873 defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpeqm,
1874 "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
1875 defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpeqm,
1876 "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
1877 defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpeqm,
1878 "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
1879 defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpeqm,
1880 "VPCMPEQQZ256", [HasAVX512, HasVLX]>;
1882 defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpeqm,
1883 "VPCMPEQQZ", [HasAVX512]>;
1884 defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpeqm,
1885 "VPCMPEQQZ", [HasAVX512]>;
1886 defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpeqm,
1887 "VPCMPEQQZ", [HasAVX512]>;
1890 defm : avx512_icmp_packed_lowering<v16i8x_info, v32i1_info, X86pcmpgtm,
1891 "VPCMPGTBZ128", [HasBWI, HasVLX]>;
1892 defm : avx512_icmp_packed_lowering<v16i8x_info, v64i1_info, X86pcmpgtm,
1893 "VPCMPGTBZ128", [HasBWI, HasVLX]>;
1895 defm : avx512_icmp_packed_lowering<v32i8x_info, v64i1_info, X86pcmpgtm,
1896 "VPCMPGTBZ256", [HasBWI, HasVLX]>;
1899 defm : avx512_icmp_packed_lowering<v8i16x_info, v16i1_info, X86pcmpgtm,
1900 "VPCMPGTWZ128", [HasBWI, HasVLX]>;
1901 defm : avx512_icmp_packed_lowering<v8i16x_info, v32i1_info, X86pcmpgtm,
1902 "VPCMPGTWZ128", [HasBWI, HasVLX]>;
1903 defm : avx512_icmp_packed_lowering<v8i16x_info, v64i1_info, X86pcmpgtm,
1904 "VPCMPGTWZ128", [HasBWI, HasVLX]>;
1906 defm : avx512_icmp_packed_lowering<v16i16x_info, v32i1_info, X86pcmpgtm,
1907 "VPCMPGTWZ256", [HasBWI, HasVLX]>;
1908 defm : avx512_icmp_packed_lowering<v16i16x_info, v64i1_info, X86pcmpgtm,
1909 "VPCMPGTWZ256", [HasBWI, HasVLX]>;
1911 defm : avx512_icmp_packed_lowering<v32i16_info, v64i1_info, X86pcmpgtm,
1912 "VPCMPGTWZ", [HasBWI]>;
1915 defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v8i1_info, X86pcmpgtm,
1916 "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
1917 defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v16i1_info, X86pcmpgtm,
1918 "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
1919 defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v32i1_info, X86pcmpgtm,
1920 "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
1921 defm : avx512_icmp_packed_rmb_lowering<v4i32x_info, v64i1_info, X86pcmpgtm,
1922 "VPCMPGTDZ128", [HasAVX512, HasVLX]>;
1924 defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v16i1_info, X86pcmpgtm,
1925 "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
1926 defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v32i1_info, X86pcmpgtm,
1927 "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
1928 defm : avx512_icmp_packed_rmb_lowering<v8i32x_info, v64i1_info, X86pcmpgtm,
1929 "VPCMPGTDZ256", [HasAVX512, HasVLX]>;
1931 defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v32i1_info, X86pcmpgtm,
1932 "VPCMPGTDZ", [HasAVX512]>;
1933 defm : avx512_icmp_packed_rmb_lowering<v16i32_info, v64i1_info, X86pcmpgtm,
1934 "VPCMPGTDZ", [HasAVX512]>;
1937 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v4i1_info, X86pcmpgtm,
1938 "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
1939 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v8i1_info, X86pcmpgtm,
1940 "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
1941 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v16i1_info, X86pcmpgtm,
1942 "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
1943 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v32i1_info, X86pcmpgtm,
1944 "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
1945 defm : avx512_icmp_packed_rmb_lowering<v2i64x_info, v64i1_info, X86pcmpgtm,
1946 "VPCMPGTQZ128", [HasAVX512, HasVLX]>;
1948 defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v8i1_info, X86pcmpgtm,
1949 "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
1950 defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v16i1_info, X86pcmpgtm,
1951 "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
1952 defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v32i1_info, X86pcmpgtm,
1953 "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
1954 defm : avx512_icmp_packed_rmb_lowering<v4i64x_info, v64i1_info, X86pcmpgtm,
1955 "VPCMPGTQZ256", [HasAVX512, HasVLX]>;
1957 defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v16i1_info, X86pcmpgtm,
1958 "VPCMPGTQZ", [HasAVX512]>;
1959 defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v32i1_info, X86pcmpgtm,
1960 "VPCMPGTQZ", [HasAVX512]>;
1961 defm : avx512_icmp_packed_rmb_lowering<v8i64_info, v64i1_info, X86pcmpgtm,
1962 "VPCMPGTQZ", [HasAVX512]>;
1964 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
1965 X86VectorVTInfo _> {
1966 let isCommutable = 1 in
1967 def rri : AVX512AIi8<opc, MRMSrcReg,
1968 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
1969 !strconcat("vpcmp${cc}", Suffix,
1970 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1971 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1973 IIC_SSE_ALU_F32P_RR>, EVEX_4V;
1974 def rmi : AVX512AIi8<opc, MRMSrcMem,
1975 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc),
1976 !strconcat("vpcmp${cc}", Suffix,
1977 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
1978 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
1979 (_.VT (bitconvert (_.LdFrag addr:$src2))),
1981 IIC_SSE_ALU_F32P_RM>, EVEX_4V;
1982 let isCommutable = 1 in
1983 def rrik : AVX512AIi8<opc, MRMSrcReg,
1984 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
1986 !strconcat("vpcmp${cc}", Suffix,
1987 "\t{$src2, $src1, $dst {${mask}}|",
1988 "$dst {${mask}}, $src1, $src2}"),
1989 [(set _.KRC:$dst, (and _.KRCWM:$mask,
1990 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
1992 IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
1993 def rmik : AVX512AIi8<opc, MRMSrcMem,
1994 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
1996 !strconcat("vpcmp${cc}", Suffix,
1997 "\t{$src2, $src1, $dst {${mask}}|",
1998 "$dst {${mask}}, $src1, $src2}"),
1999 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2000 (OpNode (_.VT _.RC:$src1),
2001 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2003 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
2005 // Accept explicit immediate argument form instead of comparison code.
2006 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2007 def rri_alt : AVX512AIi8<opc, MRMSrcReg,
2008 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2009 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2010 "$dst, $src1, $src2, $cc}"),
2011 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
2013 def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
2014 (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2015 !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|",
2016 "$dst, $src1, $src2, $cc}"),
2017 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
2018 def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
2019 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2,
2021 !strconcat("vpcmp", Suffix,
2022 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2023 "$dst {${mask}}, $src1, $src2, $cc}"),
2024 [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
2026 def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
2027 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2,
2029 !strconcat("vpcmp", Suffix,
2030 "\t{$cc, $src2, $src1, $dst {${mask}}|",
2031 "$dst {${mask}}, $src1, $src2, $cc}"),
2032 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
2036 multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode,
2037 X86VectorVTInfo _> :
2038 avx512_icmp_cc<opc, Suffix, OpNode, _> {
2039 def rmib : AVX512AIi8<opc, MRMSrcMem,
2040 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2042 !strconcat("vpcmp${cc}", Suffix,
2043 "\t{${src2}", _.BroadcastStr, ", $src1, $dst|",
2044 "$dst, $src1, ${src2}", _.BroadcastStr, "}"),
2045 [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1),
2046 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2048 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
2049 def rmibk : AVX512AIi8<opc, MRMSrcMem,
2050 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2051 _.ScalarMemOp:$src2, AVX512ICC:$cc),
2052 !strconcat("vpcmp${cc}", Suffix,
2053 "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2054 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"),
2055 [(set _.KRC:$dst, (and _.KRCWM:$mask,
2056 (OpNode (_.VT _.RC:$src1),
2057 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2059 IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
2061 // Accept explicit immediate argument form instead of comparison code.
2062 let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in {
2063 def rmib_alt : AVX512AIi8<opc, MRMSrcMem,
2064 (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2,
2066 !strconcat("vpcmp", Suffix,
2067 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|",
2068 "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2069 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_B;
2070 def rmibk_alt : AVX512AIi8<opc, MRMSrcMem,
2071 (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1,
2072 _.ScalarMemOp:$src2, u8imm:$cc),
2073 !strconcat("vpcmp", Suffix,
2074 "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|",
2075 "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"),
2076 [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B;
2080 multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode,
2081 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2082 let Predicates = [prd] in
2083 defm Z : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info512>, EVEX_V512;
2085 let Predicates = [prd, HasVLX] in {
2086 defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info256>, EVEX_V256;
2087 defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, VTInfo.info128>, EVEX_V128;
2091 multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode,
2092 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
2093 let Predicates = [prd] in
2094 defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info512>,
2097 let Predicates = [prd, HasVLX] in {
2098 defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info256>,
2100 defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, VTInfo.info128>,
2105 defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, avx512vl_i8_info,
2106 HasBWI>, EVEX_CD8<8, CD8VF>;
2107 defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, avx512vl_i8_info,
2108 HasBWI>, EVEX_CD8<8, CD8VF>;
2110 defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, avx512vl_i16_info,
2111 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
2112 defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, avx512vl_i16_info,
2113 HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>;
2115 defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, avx512vl_i32_info,
2116 HasAVX512>, EVEX_CD8<32, CD8VF>;
2117 defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, avx512vl_i32_info,
2118 HasAVX512>, EVEX_CD8<32, CD8VF>;
2120 defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, avx512vl_i64_info,
2121 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2122 defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info,
2123 HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
2125 multiclass avx512_icmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
2126 SDNode OpNode, string InstrStr,
2127 list<Predicate> Preds> {
2128 let Predicates = Preds in {
2129 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2130 (_.KVT (OpNode (_.VT _.RC:$src1),
2134 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
2139 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2140 (_.KVT (OpNode (_.VT _.RC:$src1),
2141 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2144 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
2149 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2150 (_.KVT (and _.KRCWM:$mask,
2151 (OpNode (_.VT _.RC:$src1),
2155 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrik) _.KRCWM:$mask,
2161 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2162 (_.KVT (and (_.KVT _.KRCWM:$mask),
2163 (_.KVT (OpNode (_.VT _.RC:$src1),
2165 (_.LdFrag addr:$src2))),
2168 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmik) _.KRCWM:$mask,
2176 multiclass avx512_icmp_cc_packed_rmb_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
2177 SDNode OpNode, string InstrStr,
2178 list<Predicate> Preds>
2179 : avx512_icmp_cc_packed_lowering<_, NewInf, OpNode, InstrStr, Preds> {
2180 let Predicates = Preds in {
2181 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2182 (_.KVT (OpNode (_.VT _.RC:$src1),
2183 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2186 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmib) _.RC:$src1,
2191 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2192 (_.KVT (and (_.KVT _.KRCWM:$mask),
2193 (_.KVT (OpNode (_.VT _.RC:$src1),
2195 (_.ScalarLdFrag addr:$src2)),
2198 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmibk) _.KRCWM:$mask,
2207 defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpm,
2208 "VPCMPBZ128", [HasBWI, HasVLX]>;
2209 defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpm,
2210 "VPCMPBZ128", [HasBWI, HasVLX]>;
2212 defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpm,
2213 "VPCMPBZ256", [HasBWI, HasVLX]>;
2216 defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpm,
2217 "VPCMPWZ128", [HasBWI, HasVLX]>;
2218 defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpm,
2219 "VPCMPWZ128", [HasBWI, HasVLX]>;
2220 defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpm,
2221 "VPCMPWZ128", [HasBWI, HasVLX]>;
2223 defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpm,
2224 "VPCMPWZ256", [HasBWI, HasVLX]>;
2225 defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpm,
2226 "VPCMPWZ256", [HasBWI, HasVLX]>;
2228 defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpm,
2229 "VPCMPWZ", [HasBWI]>;
2232 defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpm,
2233 "VPCMPDZ128", [HasAVX512, HasVLX]>;
2234 defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpm,
2235 "VPCMPDZ128", [HasAVX512, HasVLX]>;
2236 defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpm,
2237 "VPCMPDZ128", [HasAVX512, HasVLX]>;
2238 defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpm,
2239 "VPCMPDZ128", [HasAVX512, HasVLX]>;
2241 defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpm,
2242 "VPCMPDZ256", [HasAVX512, HasVLX]>;
2243 defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpm,
2244 "VPCMPDZ256", [HasAVX512, HasVLX]>;
2245 defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpm,
2246 "VPCMPDZ256", [HasAVX512, HasVLX]>;
2248 defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpm,
2249 "VPCMPDZ", [HasAVX512]>;
2250 defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpm,
2251 "VPCMPDZ", [HasAVX512]>;
2254 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpm,
2255 "VPCMPQZ128", [HasAVX512, HasVLX]>;
2256 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpm,
2257 "VPCMPQZ128", [HasAVX512, HasVLX]>;
2258 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpm,
2259 "VPCMPQZ128", [HasAVX512, HasVLX]>;
2260 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpm,
2261 "VPCMPQZ128", [HasAVX512, HasVLX]>;
2262 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpm,
2263 "VPCMPQZ128", [HasAVX512, HasVLX]>;
2265 defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpm,
2266 "VPCMPQZ256", [HasAVX512, HasVLX]>;
2267 defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpm,
2268 "VPCMPQZ256", [HasAVX512, HasVLX]>;
2269 defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpm,
2270 "VPCMPQZ256", [HasAVX512, HasVLX]>;
2271 defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpm,
2272 "VPCMPQZ256", [HasAVX512, HasVLX]>;
2274 defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpm,
2275 "VPCMPQZ", [HasAVX512]>;
2276 defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpm,
2277 "VPCMPQZ", [HasAVX512]>;
2278 defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpm,
2279 "VPCMPQZ", [HasAVX512]>;
2282 defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v32i1_info, X86cmpmu,
2283 "VPCMPUBZ128", [HasBWI, HasVLX]>;
2284 defm : avx512_icmp_cc_packed_lowering<v16i8x_info, v64i1_info, X86cmpmu,
2285 "VPCMPUBZ128", [HasBWI, HasVLX]>;
2287 defm : avx512_icmp_cc_packed_lowering<v32i8x_info, v64i1_info, X86cmpmu,
2288 "VPCMPUBZ256", [HasBWI, HasVLX]>;
2291 defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v16i1_info, X86cmpmu,
2292 "VPCMPUWZ128", [HasBWI, HasVLX]>;
2293 defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v32i1_info, X86cmpmu,
2294 "VPCMPUWZ128", [HasBWI, HasVLX]>;
2295 defm : avx512_icmp_cc_packed_lowering<v8i16x_info, v64i1_info, X86cmpmu,
2296 "VPCMPUWZ128", [HasBWI, HasVLX]>;
2298 defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v32i1_info, X86cmpmu,
2299 "VPCMPUWZ256", [HasBWI, HasVLX]>;
2300 defm : avx512_icmp_cc_packed_lowering<v16i16x_info, v64i1_info, X86cmpmu,
2301 "VPCMPUWZ256", [HasBWI, HasVLX]>;
2303 defm : avx512_icmp_cc_packed_lowering<v32i16_info, v64i1_info, X86cmpmu,
2304 "VPCMPUWZ", [HasBWI]>;
2307 defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v8i1_info, X86cmpmu,
2308 "VPCMPUDZ128", [HasAVX512, HasVLX]>;
2309 defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v16i1_info, X86cmpmu,
2310 "VPCMPUDZ128", [HasAVX512, HasVLX]>;
2311 defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v32i1_info, X86cmpmu,
2312 "VPCMPUDZ128", [HasAVX512, HasVLX]>;
2313 defm : avx512_icmp_cc_packed_rmb_lowering<v4i32x_info, v64i1_info, X86cmpmu,
2314 "VPCMPUDZ128", [HasAVX512, HasVLX]>;
2316 defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v16i1_info, X86cmpmu,
2317 "VPCMPUDZ256", [HasAVX512, HasVLX]>;
2318 defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v32i1_info, X86cmpmu,
2319 "VPCMPUDZ256", [HasAVX512, HasVLX]>;
2320 defm : avx512_icmp_cc_packed_rmb_lowering<v8i32x_info, v64i1_info, X86cmpmu,
2321 "VPCMPUDZ256", [HasAVX512, HasVLX]>;
2323 defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v32i1_info, X86cmpmu,
2324 "VPCMPUDZ", [HasAVX512]>;
2325 defm : avx512_icmp_cc_packed_rmb_lowering<v16i32_info, v64i1_info, X86cmpmu,
2326 "VPCMPUDZ", [HasAVX512]>;
2329 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v4i1_info, X86cmpmu,
2330 "VPCMPUQZ128", [HasAVX512, HasVLX]>;
2331 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v8i1_info, X86cmpmu,
2332 "VPCMPUQZ128", [HasAVX512, HasVLX]>;
2333 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v16i1_info, X86cmpmu,
2334 "VPCMPUQZ128", [HasAVX512, HasVLX]>;
2335 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v32i1_info, X86cmpmu,
2336 "VPCMPUQZ128", [HasAVX512, HasVLX]>;
2337 defm : avx512_icmp_cc_packed_rmb_lowering<v2i64x_info, v64i1_info, X86cmpmu,
2338 "VPCMPUQZ128", [HasAVX512, HasVLX]>;
2340 defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v8i1_info, X86cmpmu,
2341 "VPCMPUQZ256", [HasAVX512, HasVLX]>;
2342 defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v16i1_info, X86cmpmu,
2343 "VPCMPUQZ256", [HasAVX512, HasVLX]>;
2344 defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v32i1_info, X86cmpmu,
2345 "VPCMPUQZ256", [HasAVX512, HasVLX]>;
2346 defm : avx512_icmp_cc_packed_rmb_lowering<v4i64x_info, v64i1_info, X86cmpmu,
2347 "VPCMPUQZ256", [HasAVX512, HasVLX]>;
2349 defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v16i1_info, X86cmpmu,
2350 "VPCMPUQZ", [HasAVX512]>;
2351 defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v32i1_info, X86cmpmu,
2352 "VPCMPUQZ", [HasAVX512]>;
2353 defm : avx512_icmp_cc_packed_rmb_lowering<v8i64_info, v64i1_info, X86cmpmu,
2354 "VPCMPUQZ", [HasAVX512]>;
2356 multiclass avx512_vcmp_common<X86VectorVTInfo _> {
2358 defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2359 (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
2360 "vcmp${cc}"#_.Suffix,
2361 "$src2, $src1", "$src1, $src2",
2362 (X86cmpm (_.VT _.RC:$src1),
2366 defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2367 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
2368 "vcmp${cc}"#_.Suffix,
2369 "$src2, $src1", "$src1, $src2",
2370 (X86cmpm (_.VT _.RC:$src1),
2371 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2374 defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
2376 (ins _.RC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
2377 "vcmp${cc}"#_.Suffix,
2378 "${src2}"##_.BroadcastStr##", $src1",
2379 "$src1, ${src2}"##_.BroadcastStr,
2380 (X86cmpm (_.VT _.RC:$src1),
2381 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
2383 // Accept explicit immediate argument form instead of comparison code.
2384 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2385 defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2387 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2389 "$cc, $src2, $src1", "$src1, $src2, $cc">;
2391 let mayLoad = 1 in {
2392 defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2394 (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
2396 "$cc, $src2, $src1", "$src1, $src2, $cc">;
2398 defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
2400 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
2402 "$cc, ${src2}"##_.BroadcastStr##", $src1",
2403 "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
2408 multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
2409 // comparison code form (VCMP[EQ/LT/LE/...]
2410 defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
2411 (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
2412 "vcmp${cc}"#_.Suffix,
2413 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
2414 (X86cmpmRnd (_.VT _.RC:$src1),
2417 (i32 FROUND_NO_EXC))>, EVEX_B;
2419 let isAsmParserOnly = 1, hasSideEffects = 0 in {
2420 defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
2422 (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
2424 "$cc, {sae}, $src2, $src1",
2425 "$src1, $src2, {sae}, $cc">, EVEX_B;
2429 multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
2430 let Predicates = [HasAVX512] in {
2431 defm Z : avx512_vcmp_common<_.info512>,
2432 avx512_vcmp_sae<_.info512>, EVEX_V512;
2435 let Predicates = [HasAVX512,HasVLX] in {
2436 defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
2437 defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
2441 defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
2442 AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
2443 defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
2444 AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
2446 multiclass avx512_fcmp_cc_packed_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
2447 string InstrStr, list<Predicate> Preds> {
2448 let Predicates = Preds in {
2449 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2450 (_.KVT (X86cmpm (_.VT _.RC:$src1),
2454 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rri) _.RC:$src1,
2459 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2460 (_.KVT (X86cmpm (_.VT _.RC:$src1),
2461 (_.VT (bitconvert (_.LdFrag addr:$src2))),
2464 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmi) _.RC:$src1,
2469 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2470 (_.KVT (X86cmpm (_.VT _.RC:$src1),
2471 (X86VBroadcast (_.ScalarLdFrag addr:$src2)),
2474 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rmbi) _.RC:$src1,
2481 multiclass avx512_fcmp_cc_packed_sae_lowering<X86VectorVTInfo _, X86KVectorVTInfo NewInf,
2482 string InstrStr, list<Predicate> Preds>
2483 : avx512_fcmp_cc_packed_lowering<_, NewInf, InstrStr, Preds> {
2485 let Predicates = Preds in
2486 def : Pat<(insert_subvector (NewInf.KVT immAllZerosV),
2487 (_.KVT (X86cmpmRnd (_.VT _.RC:$src1),
2490 (i32 FROUND_NO_EXC))),
2492 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr##rrib) _.RC:$src1,
2500 defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v8i1_info, "VCMPPSZ128",
2501 [HasAVX512, HasVLX]>;
2502 defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v16i1_info, "VCMPPSZ128",
2503 [HasAVX512, HasVLX]>;
2504 defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v32i1_info, "VCMPPSZ128",
2505 [HasAVX512, HasVLX]>;
2506 defm : avx512_fcmp_cc_packed_lowering<v4f32x_info, v64i1_info, "VCMPPSZ128",
2507 [HasAVX512, HasVLX]>;
2509 defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v16i1_info, "VCMPPSZ256",
2510 [HasAVX512, HasVLX]>;
2511 defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v32i1_info, "VCMPPSZ256",
2512 [HasAVX512, HasVLX]>;
2513 defm : avx512_fcmp_cc_packed_lowering<v8f32x_info, v64i1_info, "VCMPPSZ256",
2514 [HasAVX512, HasVLX]>;
2516 defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v32i1_info, "VCMPPSZ",
2518 defm : avx512_fcmp_cc_packed_sae_lowering<v16f32_info, v64i1_info, "VCMPPSZ",
2522 defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v4i1_info, "VCMPPDZ128",
2523 [HasAVX512, HasVLX]>;
2524 defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v8i1_info, "VCMPPDZ128",
2525 [HasAVX512, HasVLX]>;
2526 defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v16i1_info, "VCMPPDZ128",
2527 [HasAVX512, HasVLX]>;
2528 defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v32i1_info, "VCMPPDZ128",
2529 [HasAVX512, HasVLX]>;
2530 defm : avx512_fcmp_cc_packed_lowering<v2f64x_info, v64i1_info, "VCMPPDZ128",
2531 [HasAVX512, HasVLX]>;
2533 defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v8i1_info, "VCMPPDZ256",
2534 [HasAVX512, HasVLX]>;
2535 defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v16i1_info, "VCMPPDZ256",
2536 [HasAVX512, HasVLX]>;
2537 defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v32i1_info, "VCMPPDZ256",
2538 [HasAVX512, HasVLX]>;
2539 defm : avx512_fcmp_cc_packed_lowering<v4f64x_info, v64i1_info, "VCMPPDZ256",
2540 [HasAVX512, HasVLX]>;
2542 defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v16i1_info, "VCMPPDZ",
2544 defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v32i1_info, "VCMPPDZ",
2546 defm : avx512_fcmp_cc_packed_sae_lowering<v8f64_info, v64i1_info, "VCMPPDZ",
2549 // ----------------------------------------------------------------
2551 //handle fpclass instruction mask = op(reg_scalar,imm)
2552 // op(mem_scalar,imm)
2553 multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2554 X86VectorVTInfo _, Predicate prd> {
2555 let Predicates = [prd] in {
2556 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
2557 (ins _.RC:$src1, i32u8imm:$src2),
2558 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2559 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2560 (i32 imm:$src2)))], NoItinerary>;
2561 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2562 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2563 OpcodeStr##_.Suffix#
2564 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2565 [(set _.KRC:$dst,(or _.KRCWM:$mask,
2566 (OpNode (_.VT _.RC:$src1),
2567 (i32 imm:$src2))))], NoItinerary>, EVEX_K;
2568 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2569 (ins _.MemOp:$src1, i32u8imm:$src2),
2570 OpcodeStr##_.Suffix##
2571 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2573 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
2574 (i32 imm:$src2)))], NoItinerary>;
2575 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2576 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2577 OpcodeStr##_.Suffix##
2578 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2579 [(set _.KRC:$dst,(or _.KRCWM:$mask,
2580 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
2581 (i32 imm:$src2))))], NoItinerary>, EVEX_K;
2585 //handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
2586 // fpclass(reg_vec, mem_vec, imm)
2587 // fpclass(reg_vec, broadcast(eltVt), imm)
2588 multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
2589 X86VectorVTInfo _, string mem, string broadcast>{
2590 def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2591 (ins _.RC:$src1, i32u8imm:$src2),
2592 OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2593 [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
2594 (i32 imm:$src2)))], NoItinerary>;
2595 def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
2596 (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
2597 OpcodeStr##_.Suffix#
2598 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2599 [(set _.KRC:$dst,(or _.KRCWM:$mask,
2600 (OpNode (_.VT _.RC:$src1),
2601 (i32 imm:$src2))))], NoItinerary>, EVEX_K;
2602 def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2603 (ins _.MemOp:$src1, i32u8imm:$src2),
2604 OpcodeStr##_.Suffix##mem#
2605 "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
2606 [(set _.KRC:$dst,(OpNode
2607 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2608 (i32 imm:$src2)))], NoItinerary>;
2609 def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2610 (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
2611 OpcodeStr##_.Suffix##mem#
2612 "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
2613 [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
2614 (_.VT (bitconvert (_.LdFrag addr:$src1))),
2615 (i32 imm:$src2))))], NoItinerary>, EVEX_K;
2616 def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2617 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
2618 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2619 _.BroadcastStr##", $dst|$dst, ${src1}"
2620 ##_.BroadcastStr##", $src2}",
2621 [(set _.KRC:$dst,(OpNode
2622 (_.VT (X86VBroadcast
2623 (_.ScalarLdFrag addr:$src1))),
2624 (i32 imm:$src2)))], NoItinerary>,EVEX_B;
2625 def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
2626 (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
2627 OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
2628 _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
2629 _.BroadcastStr##", $src2}",
2630 [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
2631 (_.VT (X86VBroadcast
2632 (_.ScalarLdFrag addr:$src1))),
2633 (i32 imm:$src2))))], NoItinerary>,
2637 multiclass avx512_vector_fpclass_all<string OpcodeStr,
2638 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd,
2640 let Predicates = [prd] in {
2641 defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info512, "{z}",
2642 broadcast>, EVEX_V512;
2644 let Predicates = [prd, HasVLX] in {
2645 defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info128, "{x}",
2646 broadcast>, EVEX_V128;
2647 defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info256, "{y}",
2648 broadcast>, EVEX_V256;
2652 multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
2653 bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
2654 defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
2655 VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
2656 defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
2657 VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
2658 defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2659 f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
2660 defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
2661 f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
2664 defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
2665 X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
2667 //-----------------------------------------------------------------
2668 // Mask register copy, including
2669 // - copy between mask registers
2670 // - load/store mask registers
2671 // - copy from GPR to mask register and vice versa
2673 multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2674 string OpcodeStr, RegisterClass KRC,
2675 ValueType vvt, X86MemOperand x86memop> {
2676 let hasSideEffects = 0 in
2677 def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2678 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
2679 def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2680 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2681 [(set KRC:$dst, (vvt (load addr:$src)))]>;
2682 def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2683 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2684 [(store KRC:$src, addr:$dst)]>;
2687 multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2689 RegisterClass KRC, RegisterClass GRC> {
2690 let hasSideEffects = 0 in {
2691 def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2692 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
2693 def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2694 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
2698 let Predicates = [HasDQI] in
2699 defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
2700 avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
2703 let Predicates = [HasAVX512] in
2704 defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
2705 avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
2708 let Predicates = [HasBWI] in {
2709 defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
2711 defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
2713 defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
2715 defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
2719 // GR from/to mask register
2720 def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
2721 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)), VK16)>;
2722 def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
2723 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK16:$src, GR32)), sub_16bit)>;
2725 def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
2726 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$src, sub_8bit)), VK8)>;
2727 def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
2728 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit)>;
2730 def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
2731 (KMOVWrk VK16:$src)>;
2732 def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
2733 (COPY_TO_REGCLASS VK16:$src, GR32)>;
2735 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2736 (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)), sub_8bit))>, Requires<[NoDQI]>;
2737 def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
2738 (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
2739 def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
2740 (COPY_TO_REGCLASS VK8:$src, GR32)>;
2742 def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
2743 (COPY_TO_REGCLASS GR32:$src, VK32)>;
2744 def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
2745 (COPY_TO_REGCLASS VK32:$src, GR32)>;
2746 def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
2747 (COPY_TO_REGCLASS GR64:$src, VK64)>;
2748 def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
2749 (COPY_TO_REGCLASS VK64:$src, GR64)>;
2752 let Predicates = [HasDQI] in {
2753 def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
2754 (KMOVBmk addr:$dst, VK8:$src)>;
2755 def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
2756 (KMOVBkm addr:$src)>;
2758 def : Pat<(store VK4:$src, addr:$dst),
2759 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
2760 def : Pat<(store VK2:$src, addr:$dst),
2761 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
2762 def : Pat<(store VK1:$src, addr:$dst),
2763 (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK8))>;
2765 def : Pat<(v2i1 (load addr:$src)),
2766 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK2)>;
2767 def : Pat<(v4i1 (load addr:$src)),
2768 (COPY_TO_REGCLASS (KMOVBkm addr:$src), VK4)>;
2770 let Predicates = [HasAVX512, NoDQI] in {
2771 def : Pat<(store VK1:$src, addr:$dst),
2773 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)),
2775 def : Pat<(store VK2:$src, addr:$dst),
2777 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK2:$src, GR32)),
2779 def : Pat<(store VK4:$src, addr:$dst),
2781 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK4:$src, GR32)),
2783 def : Pat<(store VK8:$src, addr:$dst),
2785 (i8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK8:$src, GR32)),
2788 def : Pat<(v8i1 (load addr:$src)),
2789 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
2790 def : Pat<(v2i1 (load addr:$src)),
2791 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
2792 def : Pat<(v4i1 (load addr:$src)),
2793 (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
2796 let Predicates = [HasAVX512] in {
2797 def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
2798 (KMOVWmk addr:$dst, VK16:$src)>;
2799 def : Pat<(v1i1 (load addr:$src)),
2800 (COPY_TO_REGCLASS (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
2801 def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
2802 (KMOVWkm addr:$src)>;
2804 let Predicates = [HasBWI] in {
2805 def : Pat<(store (i32 (bitconvert (v32i1 VK32:$src))), addr:$dst),
2806 (KMOVDmk addr:$dst, VK32:$src)>;
2807 def : Pat<(v32i1 (bitconvert (i32 (load addr:$src)))),
2808 (KMOVDkm addr:$src)>;
2809 def : Pat<(store (i64 (bitconvert (v64i1 VK64:$src))), addr:$dst),
2810 (KMOVQmk addr:$dst, VK64:$src)>;
2811 def : Pat<(v64i1 (bitconvert (i64 (load addr:$src)))),
2812 (KMOVQkm addr:$src)>;
2815 let Predicates = [HasAVX512] in {
2816 multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> {
2817 def : Pat<(maskVT (scalar_to_vector GR32:$src)),
2818 (COPY_TO_REGCLASS GR32:$src, maskRC)>;
2820 def : Pat<(i32 (X86Vextract maskRC:$src, (iPTR 0))),
2821 (COPY_TO_REGCLASS maskRC:$src, GR32)>;
2823 def : Pat<(maskVT (scalar_to_vector GR8:$src)),
2824 (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
2826 def : Pat<(i8 (X86Vextract maskRC:$src, (iPTR 0))),
2827 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>;
2829 def : Pat<(i32 (anyext (i8 (X86Vextract maskRC:$src, (iPTR 0))))),
2830 (COPY_TO_REGCLASS maskRC:$src, GR32)>;
2833 defm : operation_gpr_mask_copy_lowering<VK1, v1i1>;
2834 defm : operation_gpr_mask_copy_lowering<VK2, v2i1>;
2835 defm : operation_gpr_mask_copy_lowering<VK4, v4i1>;
2836 defm : operation_gpr_mask_copy_lowering<VK8, v8i1>;
2837 defm : operation_gpr_mask_copy_lowering<VK16, v16i1>;
2838 defm : operation_gpr_mask_copy_lowering<VK32, v32i1>;
2839 defm : operation_gpr_mask_copy_lowering<VK64, v64i1>;
2841 def : Pat<(X86kshiftr (X86kshiftl (v1i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2843 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2844 GR8:$src, sub_8bit), (i32 1))), VK1)>;
2845 def : Pat<(X86kshiftr (X86kshiftl (v16i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2847 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2848 GR8:$src, sub_8bit), (i32 1))), VK16)>;
2849 def : Pat<(X86kshiftr (X86kshiftl (v8i1 (scalar_to_vector GR8:$src)), (i8 15)), (i8 15)) ,
2851 (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
2852 GR8:$src, sub_8bit), (i32 1))), VK8)>;
2856 // Mask unary operation
2858 multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
2859 RegisterClass KRC, SDPatternOperator OpNode,
2861 let Predicates = [prd] in
2862 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2863 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2864 [(set KRC:$dst, (OpNode KRC:$src))]>;
2867 multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
2868 SDPatternOperator OpNode> {
2869 defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2871 defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2872 HasAVX512>, VEX, PS;
2873 defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2874 HasBWI>, VEX, PD, VEX_W;
2875 defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2876 HasBWI>, VEX, PS, VEX_W;
2879 defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;
2881 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
2882 let Predicates = [HasAVX512, NoDQI] in
2883 def : Pat<(vnot VK8:$src),
2884 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
2886 def : Pat<(vnot VK4:$src),
2887 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
2888 def : Pat<(vnot VK2:$src),
2889 (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
2891 // Mask binary operation
2892 // - KAND, KANDN, KOR, KXNOR, KXOR
2893 multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
2894 RegisterClass KRC, SDPatternOperator OpNode,
2895 Predicate prd, bit IsCommutable> {
2896 let Predicates = [prd], isCommutable = IsCommutable in
2897 def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
2898 !strconcat(OpcodeStr,
2899 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
2900 [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
2903 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
2904 SDPatternOperator OpNode, bit IsCommutable,
2905 Predicate prdW = HasAVX512> {
2906 defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
2907 HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
2908 defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
2909 prdW, IsCommutable>, VEX_4V, VEX_L, PS;
2910 defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
2911 HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
2912 defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
2913 HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
2916 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
2917 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
2918 // These nodes use 'vnot' instead of 'not' to support vectors.
2919 def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
2920 def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
2922 defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>;
2923 defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
2924 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>;
2925 defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
2926 defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
2927 defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
2929 multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
2931 // With AVX512F, 8-bit mask is promoted to 16-bit mask,
2932 // for the DQI set, this type is legal and KxxxB instruction is used
2933 let Predicates = [NoDQI] in
2934 def : Pat<(VOpNode VK8:$src1, VK8:$src2),
2936 (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
2937 (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
2939 // All types smaller than 8 bits require conversion anyway
2940 def : Pat<(OpNode VK1:$src1, VK1:$src2),
2941 (COPY_TO_REGCLASS (Inst
2942 (COPY_TO_REGCLASS VK1:$src1, VK16),
2943 (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
2944 def : Pat<(VOpNode VK2:$src1, VK2:$src2),
2945 (COPY_TO_REGCLASS (Inst
2946 (COPY_TO_REGCLASS VK2:$src1, VK16),
2947 (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
2948 def : Pat<(VOpNode VK4:$src1, VK4:$src2),
2949 (COPY_TO_REGCLASS (Inst
2950 (COPY_TO_REGCLASS VK4:$src1, VK16),
2951 (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
2954 defm : avx512_binop_pat<and, and, KANDWrr>;
2955 defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
2956 defm : avx512_binop_pat<or, or, KORWrr>;
2957 defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
2958 defm : avx512_binop_pat<xor, xor, KXORWrr>;
2961 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
2962 RegisterClass KRCSrc, Predicate prd> {
2963 let Predicates = [prd] in {
2964 let hasSideEffects = 0 in
2965 def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
2966 (ins KRC:$src1, KRC:$src2),
2967 "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
2970 def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
2971 (!cast<Instruction>(NAME##rr)
2972 (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
2973 (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
2977 defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
2978 defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
2979 defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
2982 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
2983 SDNode OpNode, Predicate prd> {
2984 let Predicates = [prd], Defs = [EFLAGS] in
2985 def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
2986 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
2987 [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
2990 multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
2991 Predicate prdW = HasAVX512> {
2992 defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
2994 defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
2996 defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
2998 defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
3002 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
3003 defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
3006 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
3008 let Predicates = [HasAVX512] in
3009 def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
3010 !strconcat(OpcodeStr,
3011 "\t{$imm, $src, $dst|$dst, $src, $imm}"),
3012 [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
3015 multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
3017 defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
3019 let Predicates = [HasDQI] in
3020 defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
3022 let Predicates = [HasBWI] in {
3023 defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
3025 defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
3030 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
3031 defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
3033 multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
3034 def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
3035 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrr)
3036 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3037 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
3039 def : Pat<(insert_subvector (v16i1 immAllZerosV),
3040 (v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
3042 (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrr)
3043 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3044 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
3047 def : Pat<(insert_subvector (v16i1 immAllZerosV),
3048 (v8i1 (and VK8:$mask,
3049 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2)))),
3051 (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrk)
3052 (COPY_TO_REGCLASS VK8:$mask, VK16),
3053 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3054 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
3058 multiclass axv512_icmp_packed_cc_no_vlx_lowering<SDNode OpNode, string InstStr,
3059 AVX512VLVectorVTInfo _> {
3060 def : Pat<(v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
3061 (COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrri)
3062 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3063 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
3066 def : Pat<(insert_subvector (v16i1 immAllZerosV),
3067 (v8i1 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc)),
3069 (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrri)
3070 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3071 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
3075 def : Pat<(insert_subvector (v16i1 immAllZerosV),
3076 (v8i1 (and VK8:$mask,
3077 (OpNode (_.info256.VT VR256X:$src1), (_.info256.VT VR256X:$src2), imm:$cc))),
3079 (KSHIFTRWri (KSHIFTLWri (!cast<Instruction>(InstStr##Zrrik)
3080 (COPY_TO_REGCLASS VK8:$mask, VK16),
3081 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
3082 (_.info512.VT (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
3087 let Predicates = [HasAVX512, NoVLX] in {
3088 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD">;
3089 defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD">;
3091 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", avx512vl_f32_info>;
3092 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", avx512vl_i32_info>;
3093 defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUD", avx512vl_i32_info>;
3096 // Mask setting all 0s or 1s
3097 multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
3098 let Predicates = [HasAVX512] in
3099 let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
3100 def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
3101 [(set KRC:$dst, (VT Val))]>;
3104 multiclass avx512_mask_setop_w<PatFrag Val> {
3105 defm W : avx512_mask_setop<VK16, v16i1, Val>;
3106 defm D : avx512_mask_setop<VK32, v32i1, Val>;
3107 defm Q : avx512_mask_setop<VK64, v64i1, Val>;
3110 defm KSET0 : avx512_mask_setop_w<immAllZerosV>;
3111 defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
3113 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
3114 let Predicates = [HasAVX512] in {
3115 def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
3116 def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
3117 def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
3118 def : Pat<(v1i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK1)>;
3119 def : Pat<(v8i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK8)>;
3120 def : Pat<(v4i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK4)>;
3121 def : Pat<(v2i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK2)>;
3122 def : Pat<(v1i1 immAllOnesV), (COPY_TO_REGCLASS (KSET1W), VK1)>;
3125 // Patterns for kmask insert_subvector/extract_subvector to/from index=0
3126 multiclass operation_subvector_mask_lowering<RegisterClass subRC, ValueType subVT,
3127 RegisterClass RC, ValueType VT> {
3128 def : Pat<(subVT (extract_subvector (VT RC:$src), (iPTR 0))),
3129 (subVT (COPY_TO_REGCLASS RC:$src, subRC))>;
3131 def : Pat<(VT (insert_subvector undef, subRC:$src, (iPTR 0))),
3132 (VT (COPY_TO_REGCLASS subRC:$src, RC))>;
3134 defm : operation_subvector_mask_lowering<VK1, v1i1, VK2, v2i1>;
3135 defm : operation_subvector_mask_lowering<VK1, v1i1, VK4, v4i1>;
3136 defm : operation_subvector_mask_lowering<VK1, v1i1, VK8, v8i1>;
3137 defm : operation_subvector_mask_lowering<VK1, v1i1, VK16, v16i1>;
3138 defm : operation_subvector_mask_lowering<VK1, v1i1, VK32, v32i1>;
3139 defm : operation_subvector_mask_lowering<VK1, v1i1, VK64, v64i1>;
3141 defm : operation_subvector_mask_lowering<VK2, v2i1, VK4, v4i1>;
3142 defm : operation_subvector_mask_lowering<VK2, v2i1, VK8, v8i1>;
3143 defm : operation_subvector_mask_lowering<VK2, v2i1, VK16, v16i1>;
3144 defm : operation_subvector_mask_lowering<VK2, v2i1, VK32, v32i1>;
3145 defm : operation_subvector_mask_lowering<VK2, v2i1, VK64, v64i1>;
3147 defm : operation_subvector_mask_lowering<VK4, v4i1, VK8, v8i1>;
3148 defm : operation_subvector_mask_lowering<VK4, v4i1, VK16, v16i1>;
3149 defm : operation_subvector_mask_lowering<VK4, v4i1, VK32, v32i1>;
3150 defm : operation_subvector_mask_lowering<VK4, v4i1, VK64, v64i1>;
3152 defm : operation_subvector_mask_lowering<VK8, v8i1, VK16, v16i1>;
3153 defm : operation_subvector_mask_lowering<VK8, v8i1, VK32, v32i1>;
3154 defm : operation_subvector_mask_lowering<VK8, v8i1, VK64, v64i1>;
3156 defm : operation_subvector_mask_lowering<VK16, v16i1, VK32, v32i1>;
3157 defm : operation_subvector_mask_lowering<VK16, v16i1, VK64, v64i1>;
3159 defm : operation_subvector_mask_lowering<VK32, v32i1, VK64, v64i1>;
3161 def : Pat<(v2i1 (extract_subvector (v4i1 VK4:$src), (iPTR 2))),
3162 (v2i1 (COPY_TO_REGCLASS
3163 (KSHIFTRWri (COPY_TO_REGCLASS VK4:$src, VK16), (i8 2)),
3165 def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 4))),
3166 (v4i1 (COPY_TO_REGCLASS
3167 (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (i8 4)),
3169 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
3170 (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
3171 def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
3172 (v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>;
3173 def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
3174 (v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;
3177 // Patterns for kmask shift
3178 multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
3179 def : Pat<(VT (X86kshiftl RC:$src, (i8 imm:$imm))),
3180 (VT (COPY_TO_REGCLASS
3181 (KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
3184 def : Pat<(VT (X86kshiftr RC:$src, (i8 imm:$imm))),
3185 (VT (COPY_TO_REGCLASS
3186 (KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
3191 defm : mask_shift_lowering<VK8, v8i1>, Requires<[HasAVX512, NoDQI]>;
3192 defm : mask_shift_lowering<VK4, v4i1>, Requires<[HasAVX512]>;
3193 defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
3194 //===----------------------------------------------------------------------===//
3195 // AVX-512 - Aligned and unaligned load and store
3199 multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
3200 PatFrag ld_frag, PatFrag mload,
3201 SDPatternOperator SelectOprr = vselect> {
3202 let hasSideEffects = 0 in {
3203 def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
3204 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
3206 def rrkz : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3207 (ins _.KRCWM:$mask, _.RC:$src),
3208 !strconcat(OpcodeStr, "\t{$src, ${dst} {${mask}} {z}|",
3209 "${dst} {${mask}} {z}, $src}"),
3210 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3212 _.ImmAllZerosV)))], _.ExeDomain>,
3215 let canFoldAsLoad = 1, isReMaterializable = 1,
3216 SchedRW = [WriteLoad] in
3217 def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
3218 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3219 [(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
3222 let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
3223 def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
3224 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
3225 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3226 "${dst} {${mask}}, $src1}"),
3227 [(set _.RC:$dst, (_.VT (SelectOprr _.KRCWM:$mask,
3229 (_.VT _.RC:$src0))))], _.ExeDomain>,
3231 let SchedRW = [WriteLoad] in
3232 def rmk : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3233 (ins _.RC:$src0, _.KRCWM:$mask, _.MemOp:$src1),
3234 !strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
3235 "${dst} {${mask}}, $src1}"),
3236 [(set _.RC:$dst, (_.VT
3237 (vselect _.KRCWM:$mask,
3238 (_.VT (bitconvert (ld_frag addr:$src1))),
3239 (_.VT _.RC:$src0))))], _.ExeDomain>, EVEX, EVEX_K;
3241 let SchedRW = [WriteLoad] in
3242 def rmkz : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst),
3243 (ins _.KRCWM:$mask, _.MemOp:$src),
3244 OpcodeStr #"\t{$src, ${dst} {${mask}} {z}|"#
3245 "${dst} {${mask}} {z}, $src}",
3246 [(set _.RC:$dst, (_.VT (vselect _.KRCWM:$mask,
3247 (_.VT (bitconvert (ld_frag addr:$src))), _.ImmAllZerosV)))],
3248 _.ExeDomain>, EVEX, EVEX_KZ;
3250 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, undef)),
3251 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3253 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, _.ImmAllZerosV)),
3254 (!cast<Instruction>(NAME#_.ZSuffix##rmkz) _.KRCWM:$mask, addr:$ptr)>;
3256 def : Pat<(_.VT (mload addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src0))),
3257 (!cast<Instruction>(NAME#_.ZSuffix##rmk) _.RC:$src0,
3258 _.KRCWM:$mask, addr:$ptr)>;
3261 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
3262 AVX512VLVectorVTInfo _,
3264 let Predicates = [prd] in
3265 defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag,
3266 masked_load_aligned512>, EVEX_V512;
3268 let Predicates = [prd, HasVLX] in {
3269 defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag,
3270 masked_load_aligned256>, EVEX_V256;
3271 defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag,
3272 masked_load_aligned128>, EVEX_V128;
3276 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
3277 AVX512VLVectorVTInfo _,
3279 SDPatternOperator SelectOprr = vselect> {
3280 let Predicates = [prd] in
3281 defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
3282 masked_load_unaligned, SelectOprr>, EVEX_V512;
3284 let Predicates = [prd, HasVLX] in {
3285 defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
3286 masked_load_unaligned, SelectOprr>, EVEX_V256;
3287 defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
3288 masked_load_unaligned, SelectOprr>, EVEX_V128;
3292 multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
3293 PatFrag st_frag, PatFrag mstore, string Name> {
3295 let hasSideEffects = 0 in {
3296 def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
3297 OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
3298 [], _.ExeDomain>, EVEX, FoldGenData<Name#rr>;
3299 def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3300 (ins _.KRCWM:$mask, _.RC:$src),
3301 OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
3302 "${dst} {${mask}}, $src}",
3303 [], _.ExeDomain>, EVEX, EVEX_K, FoldGenData<Name#rrk>;
3304 def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
3305 (ins _.KRCWM:$mask, _.RC:$src),
3306 OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
3307 "${dst} {${mask}} {z}, $src}",
3308 [], _.ExeDomain>, EVEX, EVEX_KZ, FoldGenData<Name#rrkz>;
3311 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
3312 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
3313 [(st_frag (_.VT _.RC:$src), addr:$dst)], _.ExeDomain>, EVEX;
3314 def mrk : AVX512PI<opc, MRMDestMem, (outs),
3315 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
3316 OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
3317 [], _.ExeDomain>, EVEX, EVEX_K;
3319 def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)),
3320 (!cast<Instruction>(NAME#_.ZSuffix##mrk) addr:$ptr,
3321 _.KRCWM:$mask, _.RC:$src)>;
3325 multiclass avx512_store_vl< bits<8> opc, string OpcodeStr,
3326 AVX512VLVectorVTInfo _, Predicate prd,
3328 let Predicates = [prd] in
3329 defm Z : avx512_store<opc, OpcodeStr, _.info512, store,
3330 masked_store_unaligned, Name#Z>, EVEX_V512;
3332 let Predicates = [prd, HasVLX] in {
3333 defm Z256 : avx512_store<opc, OpcodeStr, _.info256, store,
3334 masked_store_unaligned, Name#Z256>, EVEX_V256;
3335 defm Z128 : avx512_store<opc, OpcodeStr, _.info128, store,
3336 masked_store_unaligned, Name#Z128>, EVEX_V128;
3340 multiclass avx512_alignedstore_vl<bits<8> opc, string OpcodeStr,
3341 AVX512VLVectorVTInfo _, Predicate prd,
3343 let Predicates = [prd] in
3344 defm Z : avx512_store<opc, OpcodeStr, _.info512, alignedstore512,
3345 masked_store_aligned512, Name#Z>, EVEX_V512;
3347 let Predicates = [prd, HasVLX] in {
3348 defm Z256 : avx512_store<opc, OpcodeStr, _.info256, alignedstore256,
3349 masked_store_aligned256, Name#Z256>, EVEX_V256;
3350 defm Z128 : avx512_store<opc, OpcodeStr, _.info128, alignedstore,
3351 masked_store_aligned128, Name#Z128>, EVEX_V128;
3355 defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info,
3357 avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info,
3358 HasAVX512, "VMOVAPS">,
3359 PS, EVEX_CD8<32, CD8VF>;
3361 defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
3363 avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
3364 HasAVX512, "VMOVAPD">,
3365 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3367 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
3369 avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
3371 PS, EVEX_CD8<32, CD8VF>;
3373 defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
3375 avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
3377 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3379 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
3381 avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
3382 HasAVX512, "VMOVDQA32">,
3383 PD, EVEX_CD8<32, CD8VF>;
3385 defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
3387 avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
3388 HasAVX512, "VMOVDQA64">,
3389 PD, VEX_W, EVEX_CD8<64, CD8VF>;
3391 defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
3392 avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
3393 HasBWI, "VMOVDQU8">,
3394 XD, EVEX_CD8<8, CD8VF>;
3396 defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
3397 avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
3398 HasBWI, "VMOVDQU16">,
3399 XD, VEX_W, EVEX_CD8<16, CD8VF>;
3401 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
3403 avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
3404 HasAVX512, "VMOVDQU32">,
3405 XS, EVEX_CD8<32, CD8VF>;
3407 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
3409 avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
3410 HasAVX512, "VMOVDQU64">,
3411 XS, VEX_W, EVEX_CD8<64, CD8VF>;
3413 // Special instructions to help with spilling when we don't have VLX. We need
3414 // to load or store from a ZMM register instead. These are converted in
3415 // expandPostRAPseudos.
3416 let isReMaterializable = 1, canFoldAsLoad = 1,
3417 isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
3418 def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3420 def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3422 def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
3424 def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
3428 let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
3429 def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3431 def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3433 def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
3435 def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
3439 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
3440 (v8i64 VR512:$src))),
3441 (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
3444 def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
3445 (v16i32 VR512:$src))),
3446 (VMOVDQA32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
3448 // These patterns exist to prevent the above patterns from introducing a second
3449 // mask inversion when one already exists.
3450 def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)),
3451 (bc_v8i64 (v16i32 immAllZerosV)),
3452 (v8i64 VR512:$src))),
3453 (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>;
3454 def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
3455 (v16i32 immAllZerosV),
3456 (v16i32 VR512:$src))),
3457 (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
3459 // Patterns for handling v8i1 selects of 256-bit vectors when VLX isn't
3460 // available. Use a 512-bit operation and extract.
3461 let Predicates = [HasAVX512, NoVLX] in {
3462 def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
3463 (v8f32 VR256X:$src0))),
3467 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
3468 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
3469 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
3472 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
3473 (v8i32 VR256X:$src0))),
3477 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src0, sub_ymm)),
3478 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
3479 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))),
3483 let Predicates = [HasVLX, NoBWI] in {
3484 // 128-bit load/store without BWI.
3485 def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
3486 (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3487 def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
3488 (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
3489 def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
3490 (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3491 def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
3492 (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
3494 // 256-bit load/store without BWI.
3495 def : Pat<(alignedstore256 (v16i16 VR256X:$src), addr:$dst),
3496 (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3497 def : Pat<(alignedstore256 (v32i8 VR256X:$src), addr:$dst),
3498 (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
3499 def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
3500 (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3501 def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
3502 (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
3505 let Predicates = [HasVLX] in {
3506 // Special patterns for storing subvector extracts of lower 128-bits of 256.
3507 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
3508 def : Pat<(alignedstore (v2f64 (extract_subvector
3509 (v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
3510 (VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3511 def : Pat<(alignedstore (v4f32 (extract_subvector
3512 (v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
3513 (VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3514 def : Pat<(alignedstore (v2i64 (extract_subvector
3515 (v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
3516 (VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3517 def : Pat<(alignedstore (v4i32 (extract_subvector
3518 (v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
3519 (VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3520 def : Pat<(alignedstore (v8i16 (extract_subvector
3521 (v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
3522 (VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3523 def : Pat<(alignedstore (v16i8 (extract_subvector
3524 (v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
3525 (VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3527 def : Pat<(store (v2f64 (extract_subvector
3528 (v4f64 VR256X:$src), (iPTR 0))), addr:$dst),
3529 (VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3530 def : Pat<(store (v4f32 (extract_subvector
3531 (v8f32 VR256X:$src), (iPTR 0))), addr:$dst),
3532 (VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3533 def : Pat<(store (v2i64 (extract_subvector
3534 (v4i64 VR256X:$src), (iPTR 0))), addr:$dst),
3535 (VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3536 def : Pat<(store (v4i32 (extract_subvector
3537 (v8i32 VR256X:$src), (iPTR 0))), addr:$dst),
3538 (VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3539 def : Pat<(store (v8i16 (extract_subvector
3540 (v16i16 VR256X:$src), (iPTR 0))), addr:$dst),
3541 (VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3542 def : Pat<(store (v16i8 (extract_subvector
3543 (v32i8 VR256X:$src), (iPTR 0))), addr:$dst),
3544 (VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256X:$src,sub_xmm)))>;
3546 // Special patterns for storing subvector extracts of lower 128-bits of 512.
3547 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
3548 def : Pat<(alignedstore (v2f64 (extract_subvector
3549 (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
3550 (VMOVAPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3551 def : Pat<(alignedstore (v4f32 (extract_subvector
3552 (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
3553 (VMOVAPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3554 def : Pat<(alignedstore (v2i64 (extract_subvector
3555 (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
3556 (VMOVDQA64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3557 def : Pat<(alignedstore (v4i32 (extract_subvector
3558 (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
3559 (VMOVDQA32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3560 def : Pat<(alignedstore (v8i16 (extract_subvector
3561 (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
3562 (VMOVDQA32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3563 def : Pat<(alignedstore (v16i8 (extract_subvector
3564 (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
3565 (VMOVDQA32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3567 def : Pat<(store (v2f64 (extract_subvector
3568 (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
3569 (VMOVUPDZ128mr addr:$dst, (v2f64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3570 def : Pat<(store (v4f32 (extract_subvector
3571 (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
3572 (VMOVUPSZ128mr addr:$dst, (v4f32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3573 def : Pat<(store (v2i64 (extract_subvector
3574 (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
3575 (VMOVDQU64Z128mr addr:$dst, (v2i64 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3576 def : Pat<(store (v4i32 (extract_subvector
3577 (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
3578 (VMOVDQU32Z128mr addr:$dst, (v4i32 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3579 def : Pat<(store (v8i16 (extract_subvector
3580 (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
3581 (VMOVDQU32Z128mr addr:$dst, (v8i16 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3582 def : Pat<(store (v16i8 (extract_subvector
3583 (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
3584 (VMOVDQU32Z128mr addr:$dst, (v16i8 (EXTRACT_SUBREG VR512:$src,sub_xmm)))>;
3586 // Special patterns for storing subvector extracts of lower 256-bits of 512.
3587 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
3588 def : Pat<(alignedstore256 (v4f64 (extract_subvector
3589 (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
3590 (VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3591 def : Pat<(alignedstore (v8f32 (extract_subvector
3592 (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
3593 (VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3594 def : Pat<(alignedstore256 (v4i64 (extract_subvector
3595 (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
3596 (VMOVDQA64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3597 def : Pat<(alignedstore256 (v8i32 (extract_subvector
3598 (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
3599 (VMOVDQA32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3600 def : Pat<(alignedstore256 (v16i16 (extract_subvector
3601 (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
3602 (VMOVDQA32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3603 def : Pat<(alignedstore256 (v32i8 (extract_subvector
3604 (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
3605 (VMOVDQA32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3607 def : Pat<(store (v4f64 (extract_subvector
3608 (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
3609 (VMOVUPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3610 def : Pat<(store (v8f32 (extract_subvector
3611 (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
3612 (VMOVUPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3613 def : Pat<(store (v4i64 (extract_subvector
3614 (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
3615 (VMOVDQU64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3616 def : Pat<(store (v8i32 (extract_subvector
3617 (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
3618 (VMOVDQU32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3619 def : Pat<(store (v16i16 (extract_subvector
3620 (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
3621 (VMOVDQU32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3622 def : Pat<(store (v32i8 (extract_subvector
3623 (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
3624 (VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
3628 // Move Int Doubleword to Packed Double Int
3630 let ExeDomain = SSEPackedInt in {
3631 def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
3632 "vmovd\t{$src, $dst|$dst, $src}",
3634 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
3636 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
3637 "vmovd\t{$src, $dst|$dst, $src}",
3639 (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
3640 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
3641 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
3642 "vmovq\t{$src, $dst|$dst, $src}",
3644 (v2i64 (scalar_to_vector GR64:$src)))],
3645 IIC_SSE_MOVDQ>, EVEX, VEX_W;
3646 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
3647 def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
3649 "vmovq\t{$src, $dst|$dst, $src}", []>,
3650 EVEX, VEX_W, EVEX_CD8<64, CD8VT1>;
3651 let isCodeGenOnly = 1 in {
3652 def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
3653 "vmovq\t{$src, $dst|$dst, $src}",
3654 [(set FR64X:$dst, (bitconvert GR64:$src))],
3655 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3656 def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src),
3657 "vmovq\t{$src, $dst|$dst, $src}",
3658 [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>,
3659 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
3660 def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
3661 "vmovq\t{$src, $dst|$dst, $src}",
3662 [(set GR64:$dst, (bitconvert FR64X:$src))],
3663 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
3664 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
3665 "vmovq\t{$src, $dst|$dst, $src}",
3666 [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
3667 IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
3668 EVEX_CD8<64, CD8VT1>;
3670 } // ExeDomain = SSEPackedInt
3672 // Move Int Doubleword to Single Scalar
3674 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3675 def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
3676 "vmovd\t{$src, $dst|$dst, $src}",
3677 [(set FR32X:$dst, (bitconvert GR32:$src))],
3678 IIC_SSE_MOVDQ>, EVEX;
3680 def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
3681 "vmovd\t{$src, $dst|$dst, $src}",
3682 [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
3683 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
3684 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3686 // Move doubleword from xmm register to r/m32
3688 let ExeDomain = SSEPackedInt in {
3689 def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
3690 "vmovd\t{$src, $dst|$dst, $src}",
3691 [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
3692 (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
3694 def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3695 (ins i32mem:$dst, VR128X:$src),
3696 "vmovd\t{$src, $dst|$dst, $src}",
3697 [(store (i32 (extractelt (v4i32 VR128X:$src),
3698 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
3699 EVEX, EVEX_CD8<32, CD8VT1>;
3700 } // ExeDomain = SSEPackedInt
3702 // Move quadword from xmm1 register to r/m64
3704 let ExeDomain = SSEPackedInt in {
3705 def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
3706 "vmovq\t{$src, $dst|$dst, $src}",
3707 [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
3709 IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
3710 Requires<[HasAVX512, In64BitMode]>;
3712 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
3713 def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
3714 "vmovq\t{$src, $dst|$dst, $src}",
3715 [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
3716 Requires<[HasAVX512, In64BitMode]>;
3718 def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
3719 (ins i64mem:$dst, VR128X:$src),
3720 "vmovq\t{$src, $dst|$dst, $src}",
3721 [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
3722 addr:$dst)], IIC_SSE_MOVDQ>,
3723 EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
3724 Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
3726 let hasSideEffects = 0 in
3727 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
3729 "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
3731 } // ExeDomain = SSEPackedInt
3733 // Move Scalar Single to Double Int
3735 let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
3736 def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
3738 "vmovd\t{$src, $dst|$dst, $src}",
3739 [(set GR32:$dst, (bitconvert FR32X:$src))],
3740 IIC_SSE_MOVD_ToGP>, EVEX;
3741 def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs),
3742 (ins i32mem:$dst, FR32X:$src),
3743 "vmovd\t{$src, $dst|$dst, $src}",
3744 [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
3745 IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
3746 } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
3748 // Move Quadword Int to Packed Quadword Int
3750 let ExeDomain = SSEPackedInt in {
3751 def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
3753 "vmovq\t{$src, $dst|$dst, $src}",
3755 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
3756 EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
3757 } // ExeDomain = SSEPackedInt
3759 //===----------------------------------------------------------------------===//
3760 // AVX-512 MOVSS, MOVSD
3761 //===----------------------------------------------------------------------===//
3763 multiclass avx512_move_scalar<string asm, SDNode OpNode,
3764 X86VectorVTInfo _> {
3765 def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3766 (ins _.RC:$src1, _.FRC:$src2),
3767 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
3768 [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
3769 (scalar_to_vector _.FRC:$src2))))],
3770 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
3771 def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3772 (ins _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
3773 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
3774 "$dst {${mask}} {z}, $src1, $src2}"),
3775 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3776 (_.VT (OpNode _.RC:$src1,
3777 (scalar_to_vector _.FRC:$src2))),
3779 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ;
3780 let Constraints = "$src0 = $dst" in
3781 def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
3782 (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2),
3783 !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
3784 "$dst {${mask}}, $src1, $src2}"),
3785 [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
3786 (_.VT (OpNode _.RC:$src1,
3787 (scalar_to_vector _.FRC:$src2))),
3788 (_.VT _.RC:$src0))))],
3789 _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K;
3790 let canFoldAsLoad = 1, isReMaterializable = 1 in
3791 def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
3792 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3793 [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
3794 _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
3795 let mayLoad = 1, hasSideEffects = 0 in {
3796 let Constraints = "$src0 = $dst" in
3797 def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3798 (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
3799 !strconcat(asm, "\t{$src, $dst {${mask}}|",
3800 "$dst {${mask}}, $src}"),
3801 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K;
3802 def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
3803 (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
3804 !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
3805 "$dst {${mask}} {z}, $src}"),
3806 [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ;
3808 def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
3809 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
3810 [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
3812 let mayStore = 1, hasSideEffects = 0 in
3813 def mrk: AVX512PI<0x11, MRMDestMem, (outs),
3814 (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
3815 !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
3816 [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K;
3819 defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
3820 VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
3822 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
3823 VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
3826 multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
3827 PatLeaf ZeroFP, X86VectorVTInfo _> {
3829 def : Pat<(_.VT (OpNode _.RC:$src0,
3830 (_.VT (scalar_to_vector
3831 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3832 (_.EltVT _.FRC:$src1),
3833 (_.EltVT _.FRC:$src2))))))),
3834 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrk)
3835 (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
3836 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3837 (_.VT _.RC:$src0), _.FRC:$src1),
3840 def : Pat<(_.VT (OpNode _.RC:$src0,
3841 (_.VT (scalar_to_vector
3842 (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
3843 (_.EltVT _.FRC:$src1),
3844 (_.EltVT ZeroFP))))))),
3845 (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrkz)
3846 (COPY_TO_REGCLASS GR32:$mask, VK1WM),
3847 (_.VT _.RC:$src0), _.FRC:$src1),
3851 multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3852 dag Mask, RegisterClass MaskRC> {
3854 def : Pat<(masked_store addr:$dst, Mask,
3855 (_.info512.VT (insert_subvector undef,
3856 (_.info256.VT (insert_subvector undef,
3857 (_.info128.VT _.info128.RC:$src),
3860 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3861 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3862 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3866 multiclass avx512_store_scalar_lowering_subreg<string InstrStr,
3867 AVX512VLVectorVTInfo _,
3868 dag Mask, RegisterClass MaskRC,
3869 SubRegIndex subreg> {
3871 def : Pat<(masked_store addr:$dst, Mask,
3872 (_.info512.VT (insert_subvector undef,
3873 (_.info256.VT (insert_subvector undef,
3874 (_.info128.VT _.info128.RC:$src),
3877 (!cast<Instruction>(InstrStr#mrk) addr:$dst,
3878 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3879 (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
3883 multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
3884 dag Mask, RegisterClass MaskRC> {
3886 def : Pat<(_.info128.VT (extract_subvector
3887 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3888 (_.info512.VT (bitconvert
3889 (v16i32 immAllZerosV))))),
3891 (!cast<Instruction>(InstrStr#rmkz)
3892 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3895 def : Pat<(_.info128.VT (extract_subvector
3896 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3897 (_.info512.VT (insert_subvector undef,
3898 (_.info256.VT (insert_subvector undef,
3899 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3903 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3904 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM),
3909 multiclass avx512_load_scalar_lowering_subreg<string InstrStr,
3910 AVX512VLVectorVTInfo _,
3911 dag Mask, RegisterClass MaskRC,
3912 SubRegIndex subreg> {
3914 def : Pat<(_.info128.VT (extract_subvector
3915 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3916 (_.info512.VT (bitconvert
3917 (v16i32 immAllZerosV))))),
3919 (!cast<Instruction>(InstrStr#rmkz)
3920 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3923 def : Pat<(_.info128.VT (extract_subvector
3924 (_.info512.VT (masked_load addr:$srcAddr, Mask,
3925 (_.info512.VT (insert_subvector undef,
3926 (_.info256.VT (insert_subvector undef,
3927 (_.info128.VT (X86vzmovl _.info128.RC:$src)),
3931 (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
3932 (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM),
3937 defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
3938 defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
3940 defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3941 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3942 defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3943 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3944 defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3945 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3947 defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
3948 (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
3949 defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
3950 (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16, sub_16bit>;
3951 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
3952 (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
3954 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
3955 (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
3956 VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
3958 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
3959 (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
3960 VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
3962 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
3963 (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM),
3964 (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
3966 let hasSideEffects = 0 in {
3967 def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3968 (ins VR128X:$src1, FR32X:$src2),
3969 "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3970 [], NoItinerary>, XS, EVEX_4V, VEX_LIG,
3971 FoldGenData<"VMOVSSZrr">;
3973 let Constraints = "$src0 = $dst" in
3974 def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3975 (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask,
3976 VR128X:$src1, FR32X:$src2),
3977 "vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
3978 "$dst {${mask}}, $src1, $src2}",
3979 [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG,
3980 FoldGenData<"VMOVSSZrrk">;
3982 def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3983 (ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2),
3984 "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"#
3985 "$dst {${mask}} {z}, $src1, $src2}",
3986 [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
3987 FoldGenData<"VMOVSSZrrkz">;
3989 def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3990 (ins VR128X:$src1, FR64X:$src2),
3991 "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
3992 [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W,
3993 FoldGenData<"VMOVSDZrr">;
3995 let Constraints = "$src0 = $dst" in
3996 def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
3997 (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask,
3998 VR128X:$src1, FR64X:$src2),
3999 "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"#
4000 "$dst {${mask}}, $src1, $src2}",
4001 [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG,
4002 VEX_W, FoldGenData<"VMOVSDZrrk">;
4004 def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
4005 (ins f64x_info.KRCWM:$mask, VR128X:$src1,
4007 "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
4008 "$dst {${mask}} {z}, $src1, $src2}",
4009 [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
4010 VEX_W, FoldGenData<"VMOVSDZrrkz">;
4013 let Predicates = [HasAVX512] in {
4014 let AddedComplexity = 15 in {
4015 // Move scalar to XMM zero-extended, zeroing a VR128X then do a
4016 // MOVS{S,D} to the lower bits.
4017 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32X:$src)))),
4018 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), FR32X:$src)>;
4019 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
4020 (VMOVSSZrr (v4f32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
4021 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
4022 (VMOVSSZrr (v4i32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
4023 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
4024 (VMOVSDZrr (v2f64 (AVX512_128_SET0)), FR64X:$src)>;
4027 // Move low f32 and clear high bits.
4028 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
4029 (SUBREG_TO_REG (i32 0),
4030 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4031 (EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
4032 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
4033 (SUBREG_TO_REG (i32 0),
4034 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4035 (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
4036 def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
4037 (SUBREG_TO_REG (i32 0),
4038 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4039 (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
4040 def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
4041 (SUBREG_TO_REG (i32 0),
4042 (VMOVSSZrr (v4i32 (AVX512_128_SET0)),
4043 (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
4045 let AddedComplexity = 20 in {
4046 // MOVSSrm zeros the high parts of the register; represent this
4047 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4048 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
4049 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4050 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
4051 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4052 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
4053 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4054 def : Pat<(v4f32 (X86vzload addr:$src)),
4055 (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
4057 // MOVSDrm zeros the high parts of the register; represent this
4058 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
4059 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
4060 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4061 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
4062 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4063 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
4064 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4065 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
4066 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4067 def : Pat<(v2f64 (X86vzload addr:$src)),
4068 (COPY_TO_REGCLASS (VMOVSDZrm addr:$src), VR128X)>;
4070 // Represent the same patterns above but in the form they appear for
4072 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4073 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4074 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4075 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4076 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4077 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4078 def : Pat<(v8f32 (X86vzload addr:$src)),
4079 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4080 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4081 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4082 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4083 def : Pat<(v4f64 (X86vzload addr:$src)),
4084 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4086 // Represent the same patterns above but in the form they appear for
4088 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4089 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
4090 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4091 def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
4092 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
4093 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4094 def : Pat<(v16f32 (X86vzload addr:$src)),
4095 (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
4096 def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
4097 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
4098 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4099 def : Pat<(v8f64 (X86vzload addr:$src)),
4100 (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
4102 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
4103 (v4f32 (scalar_to_vector FR32X:$src)), (iPTR 0)))),
4104 (SUBREG_TO_REG (i32 0), (v4f32 (VMOVSSZrr (v4f32 (AVX512_128_SET0)),
4105 FR32X:$src)), sub_xmm)>;
4106 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
4107 (v2f64 (scalar_to_vector FR64X:$src)), (iPTR 0)))),
4108 (SUBREG_TO_REG (i64 0), (v2f64 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4109 FR64X:$src)), sub_xmm)>;
4110 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4111 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
4112 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4114 // Move low f64 and clear high bits.
4115 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256X:$src))),
4116 (SUBREG_TO_REG (i32 0),
4117 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4118 (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
4119 def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
4120 (SUBREG_TO_REG (i32 0),
4121 (VMOVSDZrr (v2f64 (AVX512_128_SET0)),
4122 (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
4124 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
4125 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4126 (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
4127 def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
4128 (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (AVX512_128_SET0)),
4129 (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
4131 // Extract and store.
4132 def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
4134 (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
4136 // Shuffle with VMOVSS
4137 def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)),
4138 (VMOVSSZrr (v4i32 VR128X:$src1),
4139 (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>;
4140 def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)),
4141 (VMOVSSZrr (v4f32 VR128X:$src1),
4142 (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>;
4145 def : Pat<(v8i32 (X86Movss VR256X:$src1, VR256X:$src2)),
4146 (SUBREG_TO_REG (i32 0),
4147 (VMOVSSZrr (EXTRACT_SUBREG (v8i32 VR256X:$src1), sub_xmm),
4148 (EXTRACT_SUBREG (v8i32 VR256X:$src2), sub_xmm)),
4150 def : Pat<(v8f32 (X86Movss VR256X:$src1, VR256X:$src2)),
4151 (SUBREG_TO_REG (i32 0),
4152 (VMOVSSZrr (EXTRACT_SUBREG (v8f32 VR256X:$src1), sub_xmm),
4153 (EXTRACT_SUBREG (v8f32 VR256X:$src2), sub_xmm)),
4156 // Shuffle with VMOVSD
4157 def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)),
4158 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
4159 def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)),
4160 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
4163 def : Pat<(v4i64 (X86Movsd VR256X:$src1, VR256X:$src2)),
4164 (SUBREG_TO_REG (i32 0),
4165 (VMOVSDZrr (EXTRACT_SUBREG (v4i64 VR256X:$src1), sub_xmm),
4166 (EXTRACT_SUBREG (v4i64 VR256X:$src2), sub_xmm)),
4168 def : Pat<(v4f64 (X86Movsd VR256X:$src1, VR256X:$src2)),
4169 (SUBREG_TO_REG (i32 0),
4170 (VMOVSDZrr (EXTRACT_SUBREG (v4f64 VR256X:$src1), sub_xmm),
4171 (EXTRACT_SUBREG (v4f64 VR256X:$src2), sub_xmm)),
4174 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
4175 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
4176 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
4177 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
4178 def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
4179 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
4180 def : Pat<(v4i32 (X86Movlps VR128X:$src1, VR128X:$src2)),
4181 (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>;
4184 let AddedComplexity = 15 in
4185 def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
4187 "vmovq\t{$src, $dst|$dst, $src}",
4188 [(set VR128X:$dst, (v2i64 (X86vzmovl
4189 (v2i64 VR128X:$src))))],
4190 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
4192 let Predicates = [HasAVX512] in {
4193 let AddedComplexity = 15 in {
4194 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
4195 (VMOVDI2PDIZrr GR32:$src)>;
4197 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
4198 (VMOV64toPQIZrr GR64:$src)>;
4200 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
4201 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4202 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4204 def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
4205 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
4206 (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
4208 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
4209 let AddedComplexity = 20 in {
4210 def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
4211 (VMOVDI2PDIZrm addr:$src)>;
4212 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
4213 (VMOVDI2PDIZrm addr:$src)>;
4214 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
4215 (VMOVDI2PDIZrm addr:$src)>;
4216 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
4217 (VMOVDI2PDIZrm addr:$src)>;
4218 def : Pat<(v4i32 (X86vzload addr:$src)),
4219 (VMOVDI2PDIZrm addr:$src)>;
4220 def : Pat<(v8i32 (X86vzload addr:$src)),
4221 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4222 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
4223 (VMOVQI2PQIZrm addr:$src)>;
4224 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
4225 (VMOVZPQILo2PQIZrr VR128X:$src)>;
4226 def : Pat<(v2i64 (X86vzload addr:$src)),
4227 (VMOVQI2PQIZrm addr:$src)>;
4228 def : Pat<(v4i64 (X86vzload addr:$src)),
4229 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4232 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
4233 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
4234 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4235 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4236 def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
4237 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
4238 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
4240 // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
4241 def : Pat<(v16i32 (X86vzload addr:$src)),
4242 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
4243 def : Pat<(v8i64 (X86vzload addr:$src)),
4244 (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
4246 //===----------------------------------------------------------------------===//
4247 // AVX-512 - Non-temporals
4248 //===----------------------------------------------------------------------===//
4249 let SchedRW = [WriteLoad] in {
4250 def VMOVNTDQAZrm : AVX512PI<0x2A, MRMSrcMem, (outs VR512:$dst),
4251 (ins i512mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}",
4252 [], SSEPackedInt>, EVEX, T8PD, EVEX_V512,
4253 EVEX_CD8<64, CD8VF>;
4255 let Predicates = [HasVLX] in {
4256 def VMOVNTDQAZ256rm : AVX512PI<0x2A, MRMSrcMem, (outs VR256X:$dst),
4258 "vmovntdqa\t{$src, $dst|$dst, $src}",
4259 [], SSEPackedInt>, EVEX, T8PD, EVEX_V256,
4260 EVEX_CD8<64, CD8VF>;
4262 def VMOVNTDQAZ128rm : AVX512PI<0x2A, MRMSrcMem, (outs VR128X:$dst),
4264 "vmovntdqa\t{$src, $dst|$dst, $src}",
4265 [], SSEPackedInt>, EVEX, T8PD, EVEX_V128,
4266 EVEX_CD8<64, CD8VF>;
4270 multiclass avx512_movnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
4271 PatFrag st_frag = alignednontemporalstore,
4272 InstrItinClass itin = IIC_SSE_MOVNT> {
4273 let SchedRW = [WriteStore], AddedComplexity = 400 in
4274 def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
4275 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
4276 [(st_frag (_.VT _.RC:$src), addr:$dst)],
4277 _.ExeDomain, itin>, EVEX, EVEX_CD8<_.EltSize, CD8VF>;
4280 multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
4281 AVX512VLVectorVTInfo VTInfo> {
4282 let Predicates = [HasAVX512] in
4283 defm Z : avx512_movnt<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
4285 let Predicates = [HasAVX512, HasVLX] in {
4286 defm Z256 : avx512_movnt<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
4287 defm Z128 : avx512_movnt<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
4291 defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info>, PD;
4292 defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info>, PD, VEX_W;
4293 defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info>, PS;
4295 let Predicates = [HasAVX512], AddedComplexity = 400 in {
4296 def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
4297 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4298 def : Pat<(alignednontemporalstore (v32i16 VR512:$src), addr:$dst),
4299 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4300 def : Pat<(alignednontemporalstore (v64i8 VR512:$src), addr:$dst),
4301 (VMOVNTDQZmr addr:$dst, VR512:$src)>;
4303 def : Pat<(v8f64 (alignednontemporalload addr:$src)),
4304 (VMOVNTDQAZrm addr:$src)>;
4305 def : Pat<(v16f32 (alignednontemporalload addr:$src)),
4306 (VMOVNTDQAZrm addr:$src)>;
4307 def : Pat<(v8i64 (alignednontemporalload addr:$src)),
4308 (VMOVNTDQAZrm addr:$src)>;
4309 def : Pat<(v16i32 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
4310 (VMOVNTDQAZrm addr:$src)>;
4311 def : Pat<(v32i16 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
4312 (VMOVNTDQAZrm addr:$src)>;
4313 def : Pat<(v64i8 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
4314 (VMOVNTDQAZrm addr:$src)>;
4317 let Predicates = [HasVLX], AddedComplexity = 400 in {
4318 def : Pat<(alignednontemporalstore (v8i32 VR256X:$src), addr:$dst),
4319 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4320 def : Pat<(alignednontemporalstore (v16i16 VR256X:$src), addr:$dst),
4321 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4322 def : Pat<(alignednontemporalstore (v32i8 VR256X:$src), addr:$dst),
4323 (VMOVNTDQZ256mr addr:$dst, VR256X:$src)>;
4325 def : Pat<(v4f64 (alignednontemporalload addr:$src)),
4326 (VMOVNTDQAZ256rm addr:$src)>;
4327 def : Pat<(v8f32 (alignednontemporalload addr:$src)),
4328 (VMOVNTDQAZ256rm addr:$src)>;
4329 def : Pat<(v4i64 (alignednontemporalload addr:$src)),
4330 (VMOVNTDQAZ256rm addr:$src)>;
4331 def : Pat<(v8i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
4332 (VMOVNTDQAZ256rm addr:$src)>;
4333 def : Pat<(v16i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
4334 (VMOVNTDQAZ256rm addr:$src)>;
4335 def : Pat<(v32i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
4336 (VMOVNTDQAZ256rm addr:$src)>;
4338 def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
4339 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4340 def : Pat<(alignednontemporalstore (v8i16 VR128X:$src), addr:$dst),
4341 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4342 def : Pat<(alignednontemporalstore (v16i8 VR128X:$src), addr:$dst),
4343 (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>;
4345 def : Pat<(v2f64 (alignednontemporalload addr:$src)),
4346 (VMOVNTDQAZ128rm addr:$src)>;
4347 def : Pat<(v4f32 (alignednontemporalload addr:$src)),
4348 (VMOVNTDQAZ128rm addr:$src)>;
4349 def : Pat<(v2i64 (alignednontemporalload addr:$src)),
4350 (VMOVNTDQAZ128rm addr:$src)>;
4351 def : Pat<(v4i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
4352 (VMOVNTDQAZ128rm addr:$src)>;
4353 def : Pat<(v8i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
4354 (VMOVNTDQAZ128rm addr:$src)>;
4355 def : Pat<(v16i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
4356 (VMOVNTDQAZ128rm addr:$src)>;
4359 //===----------------------------------------------------------------------===//
4360 // AVX-512 - Integer arithmetic
4362 multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4363 X86VectorVTInfo _, OpndItins itins,
4364 bit IsCommutable = 0> {
4365 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4366 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4367 "$src2, $src1", "$src1, $src2",
4368 (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
4369 itins.rr, IsCommutable>,
4370 AVX512BIBase, EVEX_4V;
4372 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4373 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4374 "$src2, $src1", "$src1, $src2",
4375 (_.VT (OpNode _.RC:$src1,
4376 (bitconvert (_.LdFrag addr:$src2)))),
4378 AVX512BIBase, EVEX_4V;
4381 multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4382 X86VectorVTInfo _, OpndItins itins,
4383 bit IsCommutable = 0> :
4384 avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
4385 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4386 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4387 "${src2}"##_.BroadcastStr##", $src1",
4388 "$src1, ${src2}"##_.BroadcastStr,
4389 (_.VT (OpNode _.RC:$src1,
4391 (_.ScalarLdFrag addr:$src2)))),
4393 AVX512BIBase, EVEX_4V, EVEX_B;
4396 multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4397 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4398 Predicate prd, bit IsCommutable = 0> {
4399 let Predicates = [prd] in
4400 defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4401 IsCommutable>, EVEX_V512;
4403 let Predicates = [prd, HasVLX] in {
4404 defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4405 IsCommutable>, EVEX_V256;
4406 defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4407 IsCommutable>, EVEX_V128;
4411 multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4412 AVX512VLVectorVTInfo VTInfo, OpndItins itins,
4413 Predicate prd, bit IsCommutable = 0> {
4414 let Predicates = [prd] in
4415 defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
4416 IsCommutable>, EVEX_V512;
4418 let Predicates = [prd, HasVLX] in {
4419 defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
4420 IsCommutable>, EVEX_V256;
4421 defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
4422 IsCommutable>, EVEX_V128;
4426 multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4427 OpndItins itins, Predicate prd,
4428 bit IsCommutable = 0> {
4429 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4430 itins, prd, IsCommutable>,
4431 VEX_W, EVEX_CD8<64, CD8VF>;
4434 multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4435 OpndItins itins, Predicate prd,
4436 bit IsCommutable = 0> {
4437 defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4438 itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
4441 multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
4442 OpndItins itins, Predicate prd,
4443 bit IsCommutable = 0> {
4444 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
4445 itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>;
4448 multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
4449 OpndItins itins, Predicate prd,
4450 bit IsCommutable = 0> {
4451 defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
4452 itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>;
4455 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4456 SDNode OpNode, OpndItins itins, Predicate prd,
4457 bit IsCommutable = 0> {
4458 defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
4461 defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
4465 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
4466 SDNode OpNode, OpndItins itins, Predicate prd,
4467 bit IsCommutable = 0> {
4468 defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
4471 defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
4475 multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
4476 bits<8> opc_d, bits<8> opc_q,
4477 string OpcodeStr, SDNode OpNode,
4478 OpndItins itins, bit IsCommutable = 0> {
4479 defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
4480 itins, HasAVX512, IsCommutable>,
4481 avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
4482 itins, HasBWI, IsCommutable>;
4485 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
4486 SDNode OpNode,X86VectorVTInfo _Src,
4487 X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct,
4488 bit IsCommutable = 0> {
4489 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4490 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4491 "$src2, $src1","$src1, $src2",
4493 (_Src.VT _Src.RC:$src1),
4494 (_Src.VT _Src.RC:$src2))),
4495 itins.rr, IsCommutable>,
4496 AVX512BIBase, EVEX_4V;
4497 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4498 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4499 "$src2, $src1", "$src1, $src2",
4500 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4501 (bitconvert (_Src.LdFrag addr:$src2)))),
4503 AVX512BIBase, EVEX_4V;
4505 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4506 (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
4508 "${src2}"##_Brdct.BroadcastStr##", $src1",
4509 "$src1, ${src2}"##_Brdct.BroadcastStr,
4510 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4511 (_Brdct.VT (X86VBroadcast
4512 (_Brdct.ScalarLdFrag addr:$src2)))))),
4514 AVX512BIBase, EVEX_4V, EVEX_B;
4517 defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
4518 SSE_INTALU_ITINS_P, 1>;
4519 defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
4520 SSE_INTALU_ITINS_P, 0>;
4521 defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
4522 SSE_INTALU_ITINS_P, HasBWI, 1>;
4523 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
4524 SSE_INTALU_ITINS_P, HasBWI, 0>;
4525 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
4526 SSE_INTALU_ITINS_P, HasBWI, 1>;
4527 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
4528 SSE_INTALU_ITINS_P, HasBWI, 0>;
4529 defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
4530 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4531 defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
4532 SSE_INTALU_ITINS_P, HasBWI, 1>;
4533 defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
4534 SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
4535 defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
4537 defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
4539 defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
4541 defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
4542 SSE_INTALU_ITINS_P, HasBWI, 1>;
4544 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
4545 AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo,
4546 SDNode OpNode, Predicate prd, bit IsCommutable = 0> {
4547 let Predicates = [prd] in
4548 defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4549 _SrcVTInfo.info512, _DstVTInfo.info512,
4550 v8i64_info, IsCommutable>,
4551 EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
4552 let Predicates = [HasVLX, prd] in {
4553 defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4554 _SrcVTInfo.info256, _DstVTInfo.info256,
4555 v4i64x_info, IsCommutable>,
4556 EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
4557 defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode,
4558 _SrcVTInfo.info128, _DstVTInfo.info128,
4559 v2i64x_info, IsCommutable>,
4560 EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
4564 defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
4565 avx512vl_i32_info, avx512vl_i64_info,
4566 X86pmuldq, HasAVX512, 1>,T8PD;
4567 defm VPMULUDQ : avx512_binop_all<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
4568 avx512vl_i32_info, avx512vl_i64_info,
4569 X86pmuludq, HasAVX512, 1>;
4570 defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P,
4571 avx512vl_i8_info, avx512vl_i8_info,
4572 X86multishift, HasVBMI, 0>, T8PD;
4574 multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4575 X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
4576 defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4577 (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
4579 "${src2}"##_Src.BroadcastStr##", $src1",
4580 "$src1, ${src2}"##_Src.BroadcastStr,
4581 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
4582 (_Src.VT (X86VBroadcast
4583 (_Src.ScalarLdFrag addr:$src2))))))>,
4584 EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
4587 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
4588 SDNode OpNode,X86VectorVTInfo _Src,
4589 X86VectorVTInfo _Dst, bit IsCommutable = 0> {
4590 defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
4591 (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
4592 "$src2, $src1","$src1, $src2",
4594 (_Src.VT _Src.RC:$src1),
4595 (_Src.VT _Src.RC:$src2))),
4596 NoItinerary, IsCommutable>,
4597 EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
4598 defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
4599 (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
4600 "$src2, $src1", "$src1, $src2",
4601 (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
4602 (bitconvert (_Src.LdFrag addr:$src2))))>,
4603 EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>;
4606 multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr,
4608 let Predicates = [HasBWI] in
4609 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info,
4611 avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info,
4612 v32i16_info>, EVEX_V512;
4613 let Predicates = [HasBWI, HasVLX] in {
4614 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info,
4616 avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info,
4617 v16i16x_info>, EVEX_V256;
4618 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info,
4620 avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info,
4621 v8i16x_info>, EVEX_V128;
4624 multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
4626 let Predicates = [HasBWI] in
4627 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info,
4628 v64i8_info>, EVEX_V512;
4629 let Predicates = [HasBWI, HasVLX] in {
4630 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
4631 v32i8x_info>, EVEX_V256;
4632 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
4633 v16i8x_info>, EVEX_V128;
4637 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
4638 SDNode OpNode, AVX512VLVectorVTInfo _Src,
4639 AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
4640 let Predicates = [HasBWI] in
4641 defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
4642 _Dst.info512, IsCommutable>, EVEX_V512;
4643 let Predicates = [HasBWI, HasVLX] in {
4644 defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
4645 _Dst.info256, IsCommutable>, EVEX_V256;
4646 defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
4647 _Dst.info128, IsCommutable>, EVEX_V128;
4651 defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, AVX512BIBase;
4652 defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, AVX5128IBase;
4653 defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase;
4654 defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
4656 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
4657 avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
4658 defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
4659 avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase;
4661 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
4662 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4663 defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
4664 SSE_INTALU_ITINS_P, HasBWI, 1>;
4665 defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
4666 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4668 defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
4669 SSE_INTALU_ITINS_P, HasBWI, 1>;
4670 defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
4671 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4672 defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
4673 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4675 defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
4676 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4677 defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
4678 SSE_INTALU_ITINS_P, HasBWI, 1>;
4679 defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
4680 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4682 defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
4683 SSE_INTALU_ITINS_P, HasBWI, 1>;
4684 defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
4685 SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
4686 defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
4687 SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
4689 // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
4690 let Predicates = [HasDQI, NoVLX] in {
4691 def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
4694 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
4695 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
4698 def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
4701 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
4702 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
4706 //===----------------------------------------------------------------------===//
4707 // AVX-512 Logical Instructions
4708 //===----------------------------------------------------------------------===//
4710 multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
4711 X86VectorVTInfo _, bit IsCommutable = 0> {
4712 defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
4713 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4714 "$src2, $src1", "$src1, $src2",
4715 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4716 (bitconvert (_.VT _.RC:$src2)))),
4717 (_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
4719 IIC_SSE_BIT_P_RR, IsCommutable>,
4720 AVX512BIBase, EVEX_4V;
4722 defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4723 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
4724 "$src2, $src1", "$src1, $src2",
4725 (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
4726 (bitconvert (_.LdFrag addr:$src2)))),
4727 (_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
4728 (bitconvert (_.LdFrag addr:$src2)))))),
4730 AVX512BIBase, EVEX_4V;
4733 multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
4734 X86VectorVTInfo _, bit IsCommutable = 0> :
4735 avx512_logic_rm<opc, OpcodeStr, OpNode, _, IsCommutable> {
4736 defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
4737 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
4738 "${src2}"##_.BroadcastStr##", $src1",
4739 "$src1, ${src2}"##_.BroadcastStr,
4740 (_.i64VT (OpNode _.RC:$src1,
4742 (_.VT (X86VBroadcast
4743 (_.ScalarLdFrag addr:$src2)))))),
4744 (_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
4746 (_.VT (X86VBroadcast
4747 (_.ScalarLdFrag addr:$src2)))))))),
4749 AVX512BIBase, EVEX_4V, EVEX_B;
4752 multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
4753 AVX512VLVectorVTInfo VTInfo,
4754 bit IsCommutable = 0> {
4755 let Predicates = [HasAVX512] in
4756 defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
4757 IsCommutable>, EVEX_V512;
4759 let Predicates = [HasAVX512, HasVLX] in {
4760 defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
4761 IsCommutable>, EVEX_V256;
4762 defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
4763 IsCommutable>, EVEX_V128;
4767 multiclass avx512_logic_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
4768 bit IsCommutable = 0> {
4769 defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
4770 IsCommutable>, EVEX_CD8<32, CD8VF>;
4773 multiclass avx512_logic_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
4774 bit IsCommutable = 0> {
4775 defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
4777 VEX_W, EVEX_CD8<64, CD8VF>;
4780 multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
4781 SDNode OpNode, bit IsCommutable = 0> {
4782 defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, IsCommutable>;
4783 defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, IsCommutable>;
4786 defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>;
4787 defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, 1>;
4788 defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, 1>;
4789 defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp>;
4791 //===----------------------------------------------------------------------===//
4792 // AVX-512 FP arithmetic
4793 //===----------------------------------------------------------------------===//
4794 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4795 SDNode OpNode, SDNode VecNode, OpndItins itins,
4797 let ExeDomain = _.ExeDomain in {
4798 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4799 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4800 "$src2, $src1", "$src1, $src2",
4801 (_.VT (VecNode _.RC:$src1, _.RC:$src2,
4802 (i32 FROUND_CURRENT))),
4805 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4806 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4807 "$src2, $src1", "$src1, $src2",
4808 (_.VT (VecNode _.RC:$src1,
4809 _.ScalarIntMemCPat:$src2,
4810 (i32 FROUND_CURRENT))),
4812 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4813 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4814 (ins _.FRC:$src1, _.FRC:$src2),
4815 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4816 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4818 let isCommutable = IsCommutable;
4820 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4821 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4822 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4823 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4824 (_.ScalarLdFrag addr:$src2)))], itins.rm>;
4829 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4830 SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
4831 let ExeDomain = _.ExeDomain in
4832 defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4833 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
4834 "$rc, $src2, $src1", "$src1, $src2, $rc",
4835 (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4836 (i32 imm:$rc)), itins.rr, IsCommutable>,
4839 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
4840 SDNode OpNode, SDNode VecNode, SDNode SaeNode,
4841 OpndItins itins, bit IsCommutable> {
4842 let ExeDomain = _.ExeDomain in {
4843 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4844 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4845 "$src2, $src1", "$src1, $src2",
4846 (_.VT (VecNode _.RC:$src1, _.RC:$src2)),
4849 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
4850 (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
4851 "$src2, $src1", "$src1, $src2",
4852 (_.VT (VecNode _.RC:$src1,
4853 _.ScalarIntMemCPat:$src2)),
4856 let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
4857 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4858 (ins _.FRC:$src1, _.FRC:$src2),
4859 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4860 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4862 let isCommutable = IsCommutable;
4864 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4865 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4866 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4867 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4868 (_.ScalarLdFrag addr:$src2)))], itins.rm>;
4871 defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
4872 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
4873 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4874 (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
4875 (i32 FROUND_NO_EXC))>, EVEX_B;
4879 multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
4881 SizeItins itins, bit IsCommutable> {
4882 defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
4883 itins.s, IsCommutable>,
4884 avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode,
4885 itins.s, IsCommutable>,
4886 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4887 defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
4888 itins.d, IsCommutable>,
4889 avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode,
4890 itins.d, IsCommutable>,
4891 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4894 multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
4895 SDNode VecNode, SDNode SaeNode,
4896 SizeItins itins, bit IsCommutable> {
4897 defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode,
4898 VecNode, SaeNode, itins.s, IsCommutable>,
4899 XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
4900 defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
4901 VecNode, SaeNode, itins.d, IsCommutable>,
4902 XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
4904 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>;
4905 defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>;
4906 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>;
4907 defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>;
4908 defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds,
4909 SSE_ALU_ITINS_S, 0>;
4910 defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds,
4911 SSE_ALU_ITINS_S, 0>;
4913 // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use
4914 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
4915 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
4916 X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
4917 let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
4918 def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
4919 (ins _.FRC:$src1, _.FRC:$src2),
4920 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4921 [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
4923 let isCommutable = 1;
4925 def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
4926 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
4927 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
4928 [(set _.FRC:$dst, (OpNode _.FRC:$src1,
4929 (_.ScalarLdFrag addr:$src2)))], itins.rm>;
4932 defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
4933 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4934 EVEX_CD8<32, CD8VT1>;
4936 defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
4937 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4938 EVEX_CD8<64, CD8VT1>;
4940 defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
4941 SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG,
4942 EVEX_CD8<32, CD8VT1>;
4944 defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
4945 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
4946 EVEX_CD8<64, CD8VT1>;
4948 multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4949 X86VectorVTInfo _, OpndItins itins,
4951 let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
4952 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4953 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4954 "$src2, $src1", "$src1, $src2",
4955 (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
4956 IsCommutable>, EVEX_4V;
4957 let mayLoad = 1 in {
4958 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4959 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
4960 "$src2, $src1", "$src1, $src2",
4961 (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
4963 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
4964 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
4965 "${src2}"##_.BroadcastStr##", $src1",
4966 "$src1, ${src2}"##_.BroadcastStr,
4967 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
4968 (_.ScalarLdFrag addr:$src2)))),
4969 itins.rm>, EVEX_4V, EVEX_B;
4974 multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4975 X86VectorVTInfo _> {
4976 let ExeDomain = _.ExeDomain in
4977 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4978 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
4979 "$rc, $src2, $src1", "$src1, $src2, $rc",
4980 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>,
4981 EVEX_4V, EVEX_B, EVEX_RC;
4985 multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
4986 X86VectorVTInfo _> {
4987 let ExeDomain = _.ExeDomain in
4988 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
4989 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
4990 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
4991 (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>,
4995 multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
4996 Predicate prd, SizeItins itins,
4997 bit IsCommutable = 0> {
4998 let Predicates = [prd] in {
4999 defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
5000 itins.s, IsCommutable>, EVEX_V512, PS,
5001 EVEX_CD8<32, CD8VF>;
5002 defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
5003 itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
5004 EVEX_CD8<64, CD8VF>;
5007 // Define only if AVX512VL feature is present.
5008 let Predicates = [prd, HasVLX] in {
5009 defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
5010 itins.s, IsCommutable>, EVEX_V128, PS,
5011 EVEX_CD8<32, CD8VF>;
5012 defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
5013 itins.s, IsCommutable>, EVEX_V256, PS,
5014 EVEX_CD8<32, CD8VF>;
5015 defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
5016 itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
5017 EVEX_CD8<64, CD8VF>;
5018 defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
5019 itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
5020 EVEX_CD8<64, CD8VF>;
5024 multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
5025 defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
5026 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5027 defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
5028 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5031 multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd> {
5032 defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
5033 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
5034 defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, v8f64_info>,
5035 EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
5038 defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
5039 SSE_ALU_ITINS_P, 1>,
5040 avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
5041 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
5042 SSE_MUL_ITINS_P, 1>,
5043 avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
5044 defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
5045 avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
5046 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
5047 avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
5048 defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
5049 SSE_ALU_ITINS_P, 0>,
5050 avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>;
5051 defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
5052 SSE_ALU_ITINS_P, 0>,
5053 avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>;
5054 let isCodeGenOnly = 1 in {
5055 defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
5056 SSE_ALU_ITINS_P, 1>;
5057 defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
5058 SSE_ALU_ITINS_P, 1>;
5060 defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
5061 SSE_ALU_ITINS_P, 1>;
5062 defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
5063 SSE_ALU_ITINS_P, 0>;
5064 defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
5065 SSE_ALU_ITINS_P, 1>;
5066 defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
5067 SSE_ALU_ITINS_P, 1>;
5069 // Patterns catch floating point selects with bitcasted integer logic ops.
5070 multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
5071 X86VectorVTInfo _, Predicate prd> {
5072 let Predicates = [prd] in {
5073 // Masked register-register logical operations.
5074 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5075 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5077 (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
5078 _.RC:$src1, _.RC:$src2)>;
5079 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5080 (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
5082 (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
5084 // Masked register-memory logical operations.
5085 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5086 (bitconvert (_.i64VT (OpNode _.RC:$src1,
5087 (load addr:$src2)))),
5089 (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
5090 _.RC:$src1, addr:$src2)>;
5091 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5092 (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
5094 (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
5096 // Register-broadcast logical operations.
5097 def : Pat<(_.i64VT (OpNode _.RC:$src1,
5098 (bitconvert (_.VT (X86VBroadcast
5099 (_.ScalarLdFrag addr:$src2)))))),
5100 (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
5101 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5103 (_.i64VT (OpNode _.RC:$src1,
5106 (_.ScalarLdFrag addr:$src2))))))),
5108 (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
5109 _.RC:$src1, addr:$src2)>;
5110 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5112 (_.i64VT (OpNode _.RC:$src1,
5115 (_.ScalarLdFrag addr:$src2))))))),
5117 (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask,
5118 _.RC:$src1, addr:$src2)>;
5122 multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
5123 defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
5124 defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
5125 defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
5126 defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
5127 defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
5128 defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
5131 defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
5132 defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
5133 defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
5134 defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
5136 let Predicates = [HasVLX,HasDQI] in {
5137 // Use packed logical operations for scalar ops.
5138 def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
5139 (COPY_TO_REGCLASS (VANDPDZ128rr
5140 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5141 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5142 def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
5143 (COPY_TO_REGCLASS (VORPDZ128rr
5144 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5145 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5146 def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
5147 (COPY_TO_REGCLASS (VXORPDZ128rr
5148 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5149 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5150 def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
5151 (COPY_TO_REGCLASS (VANDNPDZ128rr
5152 (COPY_TO_REGCLASS FR64X:$src1, VR128X),
5153 (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
5155 def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
5156 (COPY_TO_REGCLASS (VANDPSZ128rr
5157 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5158 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5159 def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
5160 (COPY_TO_REGCLASS (VORPSZ128rr
5161 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5162 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5163 def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
5164 (COPY_TO_REGCLASS (VXORPSZ128rr
5165 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5166 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5167 def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
5168 (COPY_TO_REGCLASS (VANDNPSZ128rr
5169 (COPY_TO_REGCLASS FR32X:$src1, VR128X),
5170 (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
5173 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
5174 X86VectorVTInfo _> {
5175 let ExeDomain = _.ExeDomain in {
5176 defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5177 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5178 "$src2, $src1", "$src1, $src2",
5179 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, EVEX_4V;
5180 defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5181 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
5182 "$src2, $src1", "$src1, $src2",
5183 (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, EVEX_4V;
5184 defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5185 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5186 "${src2}"##_.BroadcastStr##", $src1",
5187 "$src1, ${src2}"##_.BroadcastStr,
5188 (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5189 (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>,
5194 multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
5195 X86VectorVTInfo _> {
5196 let ExeDomain = _.ExeDomain in {
5197 defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
5198 (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
5199 "$src2, $src1", "$src1, $src2",
5200 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
5201 defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
5202 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
5203 "$src2, $src1", "$src1, $src2",
5205 (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
5206 (i32 FROUND_CURRENT))>;
5210 multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {
5211 defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
5212 avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
5213 EVEX_V512, EVEX_CD8<32, CD8VF>;
5214 defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
5215 avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
5216 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
5217 defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f32x_info>,
5218 avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,
5219 EVEX_4V,EVEX_CD8<32, CD8VT1>;
5220 defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f64x_info>,
5221 avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,
5222 EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
5224 // Define only if AVX512VL feature is present.
5225 let Predicates = [HasVLX] in {
5226 defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
5227 EVEX_V128, EVEX_CD8<32, CD8VF>;
5228 defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f32x_info>,
5229 EVEX_V256, EVEX_CD8<32, CD8VF>;
5230 defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v2f64x_info>,
5231 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
5232 defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f64x_info>,
5233 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
5236 defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;
5238 //===----------------------------------------------------------------------===//
5239 // AVX-512 VPTESTM instructions
5240 //===----------------------------------------------------------------------===//
5242 multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
5243 X86VectorVTInfo _> {
5244 let isCommutable = 1 in
5245 defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
5246 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5247 "$src2, $src1", "$src1, $src2",
5248 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
5250 defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5251 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5252 "$src2, $src1", "$src1, $src2",
5253 (OpNode (_.VT _.RC:$src1),
5254 (_.VT (bitconvert (_.LdFrag addr:$src2))))>,
5256 EVEX_CD8<_.EltSize, CD8VF>;
5259 multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5260 X86VectorVTInfo _> {
5261 defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
5262 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5263 "${src2}"##_.BroadcastStr##", $src1",
5264 "$src1, ${src2}"##_.BroadcastStr,
5265 (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
5266 (_.ScalarLdFrag addr:$src2))))>,
5267 EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
5270 // Use 512bit version to implement 128/256 bit in case NoVLX.
5271 multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo,
5272 X86VectorVTInfo _, string Suffix> {
5273 def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))),
5274 (_.KVT (COPY_TO_REGCLASS
5275 (!cast<Instruction>(NAME # Suffix # "Zrr")
5276 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5277 _.RC:$src1, _.SubRegIdx),
5278 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
5279 _.RC:$src2, _.SubRegIdx)),
5283 multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5284 AVX512VLVectorVTInfo _, string Suffix> {
5285 let Predicates = [HasAVX512] in
5286 defm Z : avx512_vptest<opc, OpcodeStr, OpNode, _.info512>,
5287 avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
5289 let Predicates = [HasAVX512, HasVLX] in {
5290 defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, _.info256>,
5291 avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
5292 defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, _.info128>,
5293 avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
5295 let Predicates = [HasAVX512, NoVLX] in {
5296 defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
5297 defm Z128_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info128, Suffix>;
5301 multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode> {
5302 defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode,
5303 avx512vl_i32_info, "D">;
5304 defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode,
5305 avx512vl_i64_info, "Q">, VEX_W;
5308 multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
5310 let Predicates = [HasBWI] in {
5311 defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, v32i16_info>,
5313 defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, v64i8_info>,
5316 let Predicates = [HasVLX, HasBWI] in {
5318 defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, v16i16x_info>,
5320 defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, v8i16x_info>,
5322 defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, v32i8x_info>,
5324 defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, v16i8x_info>,
5328 let Predicates = [HasAVX512, NoVLX] in {
5329 defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">;
5330 defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">;
5331 defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
5332 defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
5337 multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
5339 avx512_vptest_wb <opc_wb, OpcodeStr, OpNode>,
5340 avx512_vptest_dq<opc_dq, OpcodeStr, OpNode>;
5342 defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD;
5343 defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS;
5346 //===----------------------------------------------------------------------===//
5347 // AVX-512 Shift instructions
5348 //===----------------------------------------------------------------------===//
5349 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
5350 string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
5351 let ExeDomain = _.ExeDomain in {
5352 defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
5353 (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
5354 "$src2, $src1", "$src1, $src2",
5355 (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))),
5356 SSE_INTSHIFT_ITINS_P.rr>;
5357 defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5358 (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
5359 "$src2, $src1", "$src1, $src2",
5360 (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
5362 SSE_INTSHIFT_ITINS_P.rm>;
5366 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
5367 string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
5368 let ExeDomain = _.ExeDomain in
5369 defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
5370 (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
5371 "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
5372 (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2))),
5373 SSE_INTSHIFT_ITINS_P.rm>, EVEX_B;
5376 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
5377 ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
5378 // src2 is always 128-bit
5379 let ExeDomain = _.ExeDomain in {
5380 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5381 (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
5382 "$src2, $src1", "$src1, $src2",
5383 (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2))),
5384 SSE_INTSHIFT_ITINS_P.rr>, AVX512BIBase, EVEX_4V;
5385 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5386 (ins _.RC:$src1, i128mem:$src2), OpcodeStr,
5387 "$src2, $src1", "$src1, $src2",
5388 (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
5389 SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
5394 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5395 ValueType SrcVT, PatFrag bc_frag,
5396 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
5397 let Predicates = [prd] in
5398 defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
5399 VTInfo.info512>, EVEX_V512,
5400 EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ;
5401 let Predicates = [prd, HasVLX] in {
5402 defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
5403 VTInfo.info256>, EVEX_V256,
5404 EVEX_CD8<VTInfo.info256.EltSize, CD8VH>;
5405 defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag,
5406 VTInfo.info128>, EVEX_V128,
5407 EVEX_CD8<VTInfo.info128.EltSize, CD8VF>;
5411 multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
5412 string OpcodeStr, SDNode OpNode> {
5413 defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
5414 avx512vl_i32_info, HasAVX512>;
5415 defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
5416 avx512vl_i64_info, HasAVX512>, VEX_W;
5417 defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, v8i16, bc_v8i16,
5418 avx512vl_i16_info, HasBWI>;
5421 multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5422 string OpcodeStr, SDNode OpNode,
5423 AVX512VLVectorVTInfo VTInfo> {
5424 let Predicates = [HasAVX512] in
5425 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5427 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5428 VTInfo.info512>, EVEX_V512;
5429 let Predicates = [HasAVX512, HasVLX] in {
5430 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5432 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5433 VTInfo.info256>, EVEX_V256;
5434 defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5436 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5437 VTInfo.info128>, EVEX_V128;
5441 multiclass avx512_shift_rmi_w<bits<8> opcw,
5442 Format ImmFormR, Format ImmFormM,
5443 string OpcodeStr, SDNode OpNode> {
5444 let Predicates = [HasBWI] in
5445 defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5446 v32i16_info>, EVEX_V512;
5447 let Predicates = [HasVLX, HasBWI] in {
5448 defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5449 v16i16x_info>, EVEX_V256;
5450 defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5451 v8i16x_info>, EVEX_V128;
5455 multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
5456 Format ImmFormR, Format ImmFormM,
5457 string OpcodeStr, SDNode OpNode> {
5458 defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode,
5459 avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
5460 defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
5461 avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
5464 defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli>,
5465 avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli>, AVX512BIi8Base, EVEX_4V;
5467 defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
5468 avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli>, AVX512BIi8Base, EVEX_4V;
5470 defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
5471 avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
5473 defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V;
5474 defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V;
5476 defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
5477 defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
5478 defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
5480 // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX.
5481 let Predicates = [HasAVX512, NoVLX] in {
5482 def : Pat<(v4i64 (X86vsra (v4i64 VR256X:$src1), (v2i64 VR128X:$src2))),
5483 (EXTRACT_SUBREG (v8i64
5485 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5486 VR128X:$src2)), sub_ymm)>;
5488 def : Pat<(v2i64 (X86vsra (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5489 (EXTRACT_SUBREG (v8i64
5491 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5492 VR128X:$src2)), sub_xmm)>;
5494 def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
5495 (EXTRACT_SUBREG (v8i64
5497 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5498 imm:$src2)), sub_ymm)>;
5500 def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
5501 (EXTRACT_SUBREG (v8i64
5503 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5504 imm:$src2)), sub_xmm)>;
5507 //===-------------------------------------------------------------------===//
5508 // Variable Bit Shifts
5509 //===-------------------------------------------------------------------===//
5510 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
5511 X86VectorVTInfo _> {
5512 let ExeDomain = _.ExeDomain in {
5513 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
5514 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
5515 "$src2, $src1", "$src1, $src2",
5516 (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2))),
5517 SSE_INTSHIFT_ITINS_P.rr>, AVX5128IBase, EVEX_4V;
5518 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5519 (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
5520 "$src2, $src1", "$src1, $src2",
5521 (_.VT (OpNode _.RC:$src1,
5522 (_.VT (bitconvert (_.LdFrag addr:$src2))))),
5523 SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
5524 EVEX_CD8<_.EltSize, CD8VF>;
5528 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
5529 X86VectorVTInfo _> {
5530 let ExeDomain = _.ExeDomain in
5531 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
5532 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5533 "${src2}"##_.BroadcastStr##", $src1",
5534 "$src1, ${src2}"##_.BroadcastStr,
5535 (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast
5536 (_.ScalarLdFrag addr:$src2))))),
5537 SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_B,
5538 EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
5541 multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5542 AVX512VLVectorVTInfo _> {
5543 let Predicates = [HasAVX512] in
5544 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
5545 avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
5547 let Predicates = [HasAVX512, HasVLX] in {
5548 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
5549 avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
5550 defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
5551 avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
5555 multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
5557 defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode,
5559 defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode,
5560 avx512vl_i64_info>, VEX_W;
5563 // Use 512bit version to implement 128/256 bit in case NoVLX.
5564 multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr,
5565 SDNode OpNode, list<Predicate> p> {
5566 let Predicates = p in {
5567 def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1),
5568 (_.info256.VT _.info256.RC:$src2))),
5570 (!cast<Instruction>(OpcodeStr#"Zrr")
5571 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
5572 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
5575 def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1),
5576 (_.info128.VT _.info128.RC:$src2))),
5578 (!cast<Instruction>(OpcodeStr#"Zrr")
5579 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
5580 (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
5584 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
5586 let Predicates = [HasBWI] in
5587 defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, v32i16_info>,
5589 let Predicates = [HasVLX, HasBWI] in {
5591 defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, v16i16x_info>,
5593 defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, v8i16x_info>,
5598 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
5599 avx512_var_shift_w<0x12, "vpsllvw", shl>;
5601 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
5602 avx512_var_shift_w<0x11, "vpsravw", sra>;
5604 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
5605 avx512_var_shift_w<0x10, "vpsrlvw", srl>;
5607 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
5608 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
5610 defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>;
5611 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>;
5612 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRAVW", sra, [HasBWI, NoVLX]>;
5613 defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
5615 // Special handing for handling VPSRAV intrinsics.
5616 multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
5617 list<Predicate> p> {
5618 let Predicates = p in {
5619 def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
5620 (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
5622 def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
5623 (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
5624 _.RC:$src1, addr:$src2)>;
5625 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5626 (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
5627 (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
5628 _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
5629 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5630 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5632 (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
5633 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5634 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5635 (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
5636 (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
5637 _.RC:$src1, _.RC:$src2)>;
5638 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5639 (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
5641 (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
5642 _.RC:$src1, addr:$src2)>;
5646 multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
5647 list<Predicate> p> :
5648 avx512_var_shift_int_lowering<InstrStr, _, p> {
5649 let Predicates = p in {
5650 def : Pat<(_.VT (X86vsrav _.RC:$src1,
5651 (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
5652 (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
5653 _.RC:$src1, addr:$src2)>;
5654 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5655 (X86vsrav _.RC:$src1,
5656 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5658 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
5659 _.KRC:$mask, _.RC:$src1, addr:$src2)>;
5660 def : Pat<(_.VT (vselect _.KRCWM:$mask,
5661 (X86vsrav _.RC:$src1,
5662 (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
5664 (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
5665 _.RC:$src1, addr:$src2)>;
5669 defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
5670 defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
5671 defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
5672 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
5673 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
5674 defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
5675 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
5676 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
5677 defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
5680 // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5681 let Predicates = [HasAVX512, NoVLX] in {
5682 def : Pat<(v2i64 (rotl (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5683 (EXTRACT_SUBREG (v8i64
5685 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5686 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
5688 def : Pat<(v4i64 (rotl (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5689 (EXTRACT_SUBREG (v8i64
5691 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5692 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
5695 def : Pat<(v4i32 (rotl (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5696 (EXTRACT_SUBREG (v16i32
5698 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5699 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
5701 def : Pat<(v8i32 (rotl (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5702 (EXTRACT_SUBREG (v16i32
5704 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5705 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
5708 def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
5709 (EXTRACT_SUBREG (v8i64
5711 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5712 imm:$src2)), sub_xmm)>;
5713 def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
5714 (EXTRACT_SUBREG (v8i64
5716 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5717 imm:$src2)), sub_ymm)>;
5719 def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
5720 (EXTRACT_SUBREG (v16i32
5722 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5723 imm:$src2)), sub_xmm)>;
5724 def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
5725 (EXTRACT_SUBREG (v16i32
5727 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5728 imm:$src2)), sub_ymm)>;
5731 // Use 512bit VPROR/VPRORI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
5732 let Predicates = [HasAVX512, NoVLX] in {
5733 def : Pat<(v2i64 (rotr (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
5734 (EXTRACT_SUBREG (v8i64
5736 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5737 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
5739 def : Pat<(v4i64 (rotr (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
5740 (EXTRACT_SUBREG (v8i64
5742 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5743 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
5746 def : Pat<(v4i32 (rotr (v4i32 VR128X:$src1), (v4i32 VR128X:$src2))),
5747 (EXTRACT_SUBREG (v16i32
5749 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5750 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src2, sub_xmm))),
5752 def : Pat<(v8i32 (rotr (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
5753 (EXTRACT_SUBREG (v16i32
5755 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5756 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))),
5759 def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
5760 (EXTRACT_SUBREG (v8i64
5762 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5763 imm:$src2)), sub_xmm)>;
5764 def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
5765 (EXTRACT_SUBREG (v8i64
5767 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5768 imm:$src2)), sub_ymm)>;
5770 def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
5771 (EXTRACT_SUBREG (v16i32
5773 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
5774 imm:$src2)), sub_xmm)>;
5775 def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
5776 (EXTRACT_SUBREG (v16i32
5778 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
5779 imm:$src2)), sub_ymm)>;
5782 //===-------------------------------------------------------------------===//
5783 // 1-src variable permutation VPERMW/D/Q
5784 //===-------------------------------------------------------------------===//
5785 multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
5786 AVX512VLVectorVTInfo _> {
5787 let Predicates = [HasAVX512] in
5788 defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
5789 avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
5791 let Predicates = [HasAVX512, HasVLX] in
5792 defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
5793 avx512_var_shift_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
5796 multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
5797 string OpcodeStr, SDNode OpNode,
5798 AVX512VLVectorVTInfo VTInfo> {
5799 let Predicates = [HasAVX512] in
5800 defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5802 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5803 VTInfo.info512>, EVEX_V512;
5804 let Predicates = [HasAVX512, HasVLX] in
5805 defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
5807 avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
5808 VTInfo.info256>, EVEX_V256;
5811 multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
5812 Predicate prd, SDNode OpNode,
5813 AVX512VLVectorVTInfo _> {
5814 let Predicates = [prd] in
5815 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, _.info512>,
5817 let Predicates = [HasVLX, prd] in {
5818 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, _.info256>,
5820 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, _.info128>,
5825 defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
5826 avx512vl_i16_info>, VEX_W;
5827 defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
5830 defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
5832 defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
5833 avx512vl_i64_info>, VEX_W;
5834 defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
5836 defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
5837 avx512vl_f64_info>, VEX_W;
5839 defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
5840 X86VPermi, avx512vl_i64_info>,
5841 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5842 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
5843 X86VPermi, avx512vl_f64_info>,
5844 EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
5845 //===----------------------------------------------------------------------===//
5846 // AVX-512 - VPERMIL
5847 //===----------------------------------------------------------------------===//
5849 multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode,
5850 X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
5851 defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
5852 (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
5853 "$src2, $src1", "$src1, $src2",
5854 (_.VT (OpNode _.RC:$src1,
5855 (Ctrl.VT Ctrl.RC:$src2)))>,
5857 defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5858 (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
5859 "$src2, $src1", "$src1, $src2",
5862 (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
5863 T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
5864 defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
5865 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
5866 "${src2}"##_.BroadcastStr##", $src1",
5867 "$src1, ${src2}"##_.BroadcastStr,
5870 (Ctrl.VT (X86VBroadcast
5871 (Ctrl.ScalarLdFrag addr:$src2)))))>,
5872 T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
5875 multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
5876 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
5877 let Predicates = [HasAVX512] in {
5878 defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
5879 Ctrl.info512>, EVEX_V512;
5881 let Predicates = [HasAVX512, HasVLX] in {
5882 defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
5883 Ctrl.info128>, EVEX_V128;
5884 defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
5885 Ctrl.info256>, EVEX_V256;
5889 multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
5890 AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
5892 defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
5893 defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
5895 EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
5898 let ExeDomain = SSEPackedSingle in
5899 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
5901 let ExeDomain = SSEPackedDouble in
5902 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
5903 avx512vl_i64_info>, VEX_W;
5904 //===----------------------------------------------------------------------===//
5905 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
5906 //===----------------------------------------------------------------------===//
5908 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
5909 X86PShufd, avx512vl_i32_info>,
5910 EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
5911 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
5912 X86PShufhw>, EVEX, AVX512XSIi8Base;
5913 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
5914 X86PShuflw>, EVEX, AVX512XDIi8Base;
5916 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
5917 let Predicates = [HasBWI] in
5918 defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
5920 let Predicates = [HasVLX, HasBWI] in {
5921 defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, v32i8x_info>, EVEX_V256;
5922 defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, v16i8x_info>, EVEX_V128;
5926 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
5928 //===----------------------------------------------------------------------===//
5929 // Move Low to High and High to Low packed FP Instructions
5930 //===----------------------------------------------------------------------===//
5931 def VMOVLHPSZrr : AVX512PSI<0x16, MRMSrcReg, (outs VR128X:$dst),
5932 (ins VR128X:$src1, VR128X:$src2),
5933 "vmovlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5934 [(set VR128X:$dst, (v4f32 (X86Movlhps VR128X:$src1, VR128X:$src2)))],
5935 IIC_SSE_MOV_LH>, EVEX_4V;
5936 def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
5937 (ins VR128X:$src1, VR128X:$src2),
5938 "vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
5939 [(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))],
5940 IIC_SSE_MOV_LH>, EVEX_4V;
5942 let Predicates = [HasAVX512] in {
5944 def : Pat<(v4i32 (X86Movlhps VR128X:$src1, VR128X:$src2)),
5945 (VMOVLHPSZrr VR128X:$src1, VR128X:$src2)>;
5946 def : Pat<(v2i64 (X86Movlhps VR128X:$src1, VR128X:$src2)),
5947 (VMOVLHPSZrr (v2i64 VR128X:$src1), VR128X:$src2)>;
5950 def : Pat<(v4i32 (X86Movhlps VR128X:$src1, VR128X:$src2)),
5951 (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
5954 //===----------------------------------------------------------------------===//
5955 // VMOVHPS/PD VMOVLPS Instructions
5956 // All patterns was taken from SSS implementation.
5957 //===----------------------------------------------------------------------===//
5958 multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
5959 X86VectorVTInfo _> {
5960 let ExeDomain = _.ExeDomain in
5961 def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
5962 (ins _.RC:$src1, f64mem:$src2),
5963 !strconcat(OpcodeStr,
5964 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
5968 (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
5969 IIC_SSE_MOV_LH>, EVEX_4V;
5972 defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
5973 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5974 defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
5975 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5976 defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
5977 v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
5978 defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
5979 v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
5981 let Predicates = [HasAVX512] in {
5983 def : Pat<(X86Movlhps VR128X:$src1,
5984 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
5985 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5986 def : Pat<(X86Movlhps VR128X:$src1,
5987 (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
5988 (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
5990 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
5991 (scalar_to_vector (loadf64 addr:$src2)))),
5992 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
5993 def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
5994 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
5995 (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
5997 def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
5998 (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
5999 def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
6000 (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
6002 def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
6003 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6004 def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
6005 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6006 def : Pat<(v2f64 (X86Movsd VR128X:$src1,
6007 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
6008 (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
6011 def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
6012 (ins f64mem:$dst, VR128X:$src),
6013 "vmovhps\t{$src, $dst|$dst, $src}",
6014 [(store (f64 (extractelt
6015 (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
6016 (bc_v2f64 (v4f32 VR128X:$src))),
6017 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6018 EVEX, EVEX_CD8<32, CD8VT2>;
6019 def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
6020 (ins f64mem:$dst, VR128X:$src),
6021 "vmovhpd\t{$src, $dst|$dst, $src}",
6022 [(store (f64 (extractelt
6023 (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
6024 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
6025 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6026 def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
6027 (ins f64mem:$dst, VR128X:$src),
6028 "vmovlps\t{$src, $dst|$dst, $src}",
6029 [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128X:$src)),
6030 (iPTR 0))), addr:$dst)],
6032 EVEX, EVEX_CD8<32, CD8VT2>;
6033 def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
6034 (ins f64mem:$dst, VR128X:$src),
6035 "vmovlpd\t{$src, $dst|$dst, $src}",
6036 [(store (f64 (extractelt (v2f64 VR128X:$src),
6037 (iPTR 0))), addr:$dst)],
6039 EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
6041 let Predicates = [HasAVX512] in {
6043 def : Pat<(store (f64 (extractelt
6044 (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
6045 (iPTR 0))), addr:$dst),
6046 (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
6048 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
6050 (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
6051 def : Pat<(store (v4i32 (X86Movlps
6052 (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
6053 (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
6055 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6057 (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
6058 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
6060 (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
6062 //===----------------------------------------------------------------------===//
6063 // FMA - Fused Multiply Operations
6066 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6067 X86VectorVTInfo _, string Suff> {
6068 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
6069 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6070 (ins _.RC:$src2, _.RC:$src3),
6071 OpcodeStr, "$src3, $src2", "$src2, $src3",
6072 (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
6075 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6076 (ins _.RC:$src2, _.MemOp:$src3),
6077 OpcodeStr, "$src3, $src2", "$src2, $src3",
6078 (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
6081 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6082 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6083 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6084 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6086 _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
6087 AVX512FMA3Base, EVEX_B;
6090 // Additional pattern for folding broadcast nodes in other orders.
6091 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6092 (OpNode _.RC:$src1, _.RC:$src2,
6093 (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
6095 (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
6096 _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
6099 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6100 X86VectorVTInfo _, string Suff> {
6101 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
6102 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6103 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6104 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6105 (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
6106 AVX512FMA3Base, EVEX_B, EVEX_RC;
6109 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6110 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6112 let Predicates = [HasAVX512] in {
6113 defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6114 avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6115 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6117 let Predicates = [HasVLX, HasAVX512] in {
6118 defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6119 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6120 defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6121 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6125 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6126 SDNode OpNodeRnd > {
6127 defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6128 avx512vl_f32_info, "PS">;
6129 defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6130 avx512vl_f64_info, "PD">, VEX_W;
6133 defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
6134 defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
6135 defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
6136 defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
6137 defm VFNMADD213 : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddRnd>;
6138 defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
6141 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6142 X86VectorVTInfo _, string Suff> {
6143 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
6144 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6145 (ins _.RC:$src2, _.RC:$src3),
6146 OpcodeStr, "$src3, $src2", "$src2, $src3",
6147 (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
6150 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6151 (ins _.RC:$src2, _.MemOp:$src3),
6152 OpcodeStr, "$src3, $src2", "$src2, $src3",
6153 (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
6156 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6157 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6158 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6159 "$src2, ${src3}"##_.BroadcastStr,
6160 (_.VT (OpNode _.RC:$src2,
6161 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6162 _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B;
6165 // Additional patterns for folding broadcast nodes in other orders.
6166 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
6167 _.RC:$src2, _.RC:$src1)),
6168 (!cast<Instruction>(NAME#Suff#_.ZSuffix#mb) _.RC:$src1,
6169 _.RC:$src2, addr:$src3)>;
6170 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6171 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
6172 _.RC:$src2, _.RC:$src1),
6174 (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
6175 _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
6176 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6177 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
6178 _.RC:$src2, _.RC:$src1),
6180 (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbkz) _.RC:$src1,
6181 _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
6184 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6185 X86VectorVTInfo _, string Suff> {
6186 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
6187 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6188 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6189 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6190 (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), 1, 1>,
6191 AVX512FMA3Base, EVEX_B, EVEX_RC;
6194 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6195 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6197 let Predicates = [HasAVX512] in {
6198 defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6199 avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6200 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6202 let Predicates = [HasVLX, HasAVX512] in {
6203 defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6204 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6205 defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6206 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6210 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6211 SDNode OpNodeRnd > {
6212 defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6213 avx512vl_f32_info, "PS">;
6214 defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6215 avx512vl_f64_info, "PD">, VEX_W;
6218 defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
6219 defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
6220 defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
6221 defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
6222 defm VFNMADD231 : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
6223 defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
6225 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6226 X86VectorVTInfo _, string Suff> {
6227 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
6228 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6229 (ins _.RC:$src2, _.RC:$src3),
6230 OpcodeStr, "$src3, $src2", "$src2, $src3",
6231 (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
6234 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6235 (ins _.RC:$src2, _.MemOp:$src3),
6236 OpcodeStr, "$src3, $src2", "$src2, $src3",
6237 (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src3), _.RC:$src2)), 1, 0>,
6240 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6241 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6242 OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
6243 "$src2, ${src3}"##_.BroadcastStr,
6244 (_.VT (OpNode _.RC:$src1,
6245 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
6246 _.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B;
6249 // Additional patterns for folding broadcast nodes in other orders.
6250 def : Pat<(_.VT (vselect _.KRCWM:$mask,
6251 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
6252 _.RC:$src1, _.RC:$src2),
6254 (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
6255 _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
6258 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
6259 X86VectorVTInfo _, string Suff> {
6260 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
6261 defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6262 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6263 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
6264 (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))), 1, 1>,
6265 AVX512FMA3Base, EVEX_B, EVEX_RC;
6268 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6269 SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
6271 let Predicates = [HasAVX512] in {
6272 defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
6273 avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
6274 Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6276 let Predicates = [HasVLX, HasAVX512] in {
6277 defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
6278 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6279 defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
6280 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6284 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
6285 SDNode OpNodeRnd > {
6286 defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
6287 avx512vl_f32_info, "PS">;
6288 defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
6289 avx512vl_f64_info, "PD">, VEX_W;
6292 defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
6293 defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
6294 defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
6295 defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
6296 defm VFNMADD132 : avx512_fma3p_132_f<0x9C, "vfnmadd132", X86Fnmadd, X86FnmaddRnd>;
6297 defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86Fnmsub, X86FnmsubRnd>;
6300 let Constraints = "$src1 = $dst" in {
6301 multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6302 dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb,
6303 dag RHS_r, dag RHS_m > {
6304 defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6305 (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
6306 "$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>, AVX512FMA3Base;
6308 defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6309 (ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
6310 "$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>, AVX512FMA3Base;
6312 defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6313 (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
6314 OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb, 1, 1>,
6315 AVX512FMA3Base, EVEX_B, EVEX_RC;
6317 let isCodeGenOnly = 1, isCommutable = 1 in {
6318 def r : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
6319 (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
6320 !strconcat(OpcodeStr,
6321 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6323 def m : AVX512FMA3<opc, MRMSrcMem, (outs _.FRC:$dst),
6324 (ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
6325 !strconcat(OpcodeStr,
6326 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
6328 }// isCodeGenOnly = 1
6330 }// Constraints = "$src1 = $dst"
6332 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6333 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
6334 SDNode OpNodeRnds3, X86VectorVTInfo _ , string SUFF> {
6335 let ExeDomain = _.ExeDomain in {
6336 defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
6337 // Operands for intrinsic are in 123 order to preserve passthu
6339 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 FROUND_CURRENT))),
6340 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2,
6341 _.ScalarIntMemCPat:$src3, (i32 FROUND_CURRENT))),
6342 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
6344 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6346 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
6347 (_.ScalarLdFrag addr:$src3))))>;
6349 defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
6350 (_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))),
6351 (_.VT (OpNodeRnds3 _.RC:$src2, _.ScalarIntMemCPat:$src3,
6352 _.RC:$src1, (i32 FROUND_CURRENT))),
6353 (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
6355 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
6357 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
6358 (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;
6360 defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
6361 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))),
6362 (_.VT (OpNodeRnds1 _.RC:$src1, _.ScalarIntMemCPat:$src3,
6363 _.RC:$src2, (i32 FROUND_CURRENT))),
6364 (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2,
6366 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
6368 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1,
6369 (_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>;
6373 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
6374 string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
6375 SDNode OpNodeRnds3> {
6376 let Predicates = [HasAVX512] in {
6377 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6378 OpNodeRnds1, OpNodeRnds3, f32x_info, "SS">,
6379 EVEX_CD8<32, CD8VT1>, VEX_LIG;
6380 defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
6381 OpNodeRnds1, OpNodeRnds3, f64x_info, "SD">,
6382 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
6386 defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnds1,
6388 defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnds1,
6390 defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd,
6391 X86FnmaddRnds1, X86FnmaddRnds3>;
6392 defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub,
6393 X86FnmsubRnds1, X86FnmsubRnds3>;
6395 //===----------------------------------------------------------------------===//
6396 // AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
6397 //===----------------------------------------------------------------------===//
6398 let Constraints = "$src1 = $dst" in {
6399 multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
6400 X86VectorVTInfo _> {
6401 let ExeDomain = _.ExeDomain in {
6402 defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
6403 (ins _.RC:$src2, _.RC:$src3),
6404 OpcodeStr, "$src3, $src2", "$src2, $src3",
6405 (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
6408 defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6409 (ins _.RC:$src2, _.MemOp:$src3),
6410 OpcodeStr, "$src3, $src2", "$src2, $src3",
6411 (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
6414 defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
6415 (ins _.RC:$src2, _.ScalarMemOp:$src3),
6416 OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
6417 !strconcat("$src2, ${src3}", _.BroadcastStr ),
6419 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
6420 AVX512FMA3Base, EVEX_B;
6423 } // Constraints = "$src1 = $dst"
6425 multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
6426 AVX512VLVectorVTInfo _> {
6427 let Predicates = [HasIFMA] in {
6428 defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info512>,
6429 EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
6431 let Predicates = [HasVLX, HasIFMA] in {
6432 defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info256>,
6433 EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
6434 defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info128>,
6435 EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
6439 defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
6440 avx512vl_i64_info>, VEX_W;
6441 defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
6442 avx512vl_i64_info>, VEX_W;
6444 //===----------------------------------------------------------------------===//
6445 // AVX-512 Scalar convert from sign integer to float/double
6446 //===----------------------------------------------------------------------===//
6448 multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
6449 X86VectorVTInfo DstVT, X86MemOperand x86memop,
6450 PatFrag ld_frag, string asm> {
6451 let hasSideEffects = 0 in {
6452 def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst),
6453 (ins DstVT.FRC:$src1, SrcRC:$src),
6454 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
6457 def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst),
6458 (ins DstVT.FRC:$src1, x86memop:$src),
6459 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>,
6461 } // hasSideEffects = 0
6462 let isCodeGenOnly = 1 in {
6463 def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6464 (ins DstVT.RC:$src1, SrcRC:$src2),
6465 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6466 [(set DstVT.RC:$dst,
6467 (OpNode (DstVT.VT DstVT.RC:$src1),
6469 (i32 FROUND_CURRENT)))]>, EVEX_4V;
6471 def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst),
6472 (ins DstVT.RC:$src1, x86memop:$src2),
6473 !strconcat(asm,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
6474 [(set DstVT.RC:$dst,
6475 (OpNode (DstVT.VT DstVT.RC:$src1),
6476 (ld_frag addr:$src2),
6477 (i32 FROUND_CURRENT)))]>, EVEX_4V;
6478 }//isCodeGenOnly = 1
6481 multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
6482 X86VectorVTInfo DstVT, string asm> {
6483 def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst),
6484 (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc),
6486 "\t{$src2, $rc, $src1, $dst|$dst, $src1, $rc, $src2}"),
6487 [(set DstVT.RC:$dst,
6488 (OpNode (DstVT.VT DstVT.RC:$src1),
6490 (i32 imm:$rc)))]>, EVEX_4V, EVEX_B, EVEX_RC;
6493 multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
6494 X86VectorVTInfo DstVT, X86MemOperand x86memop,
6495 PatFrag ld_frag, string asm> {
6496 defm NAME : avx512_vcvtsi_round<opc, OpNode, SrcRC, DstVT, asm>,
6497 avx512_vcvtsi<opc, OpNode, SrcRC, DstVT, x86memop, ld_frag, asm>,
6501 let Predicates = [HasAVX512] in {
6502 defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
6503 v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">,
6504 XS, EVEX_CD8<32, CD8VT1>;
6505 defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
6506 v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">,
6507 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6508 defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR32,
6509 v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">,
6510 XD, EVEX_CD8<32, CD8VT1>;
6511 defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
6512 v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
6513 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6515 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6516 (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6517 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6518 (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6520 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
6521 (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6522 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
6523 (VCVTSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6524 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
6525 (VCVTSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6526 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
6527 (VCVTSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6529 def : Pat<(f32 (sint_to_fp GR32:$src)),
6530 (VCVTSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6531 def : Pat<(f32 (sint_to_fp GR64:$src)),
6532 (VCVTSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6533 def : Pat<(f64 (sint_to_fp GR32:$src)),
6534 (VCVTSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6535 def : Pat<(f64 (sint_to_fp GR64:$src)),
6536 (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6538 defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR32,
6539 v4f32x_info, i32mem, loadi32,
6540 "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>;
6541 defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
6542 v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">,
6543 XS, VEX_W, EVEX_CD8<64, CD8VT1>;
6544 defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, GR32, v2f64x_info,
6545 i32mem, loadi32, "cvtusi2sd{l}">,
6546 XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
6547 defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
6548 v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
6549 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6551 def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
6552 (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6553 def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
6554 (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
6556 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
6557 (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6558 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
6559 (VCVTUSI642SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
6560 def : Pat<(f64 (uint_to_fp (loadi32 addr:$src))),
6561 (VCVTUSI2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6562 def : Pat<(f64 (uint_to_fp (loadi64 addr:$src))),
6563 (VCVTUSI642SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>;
6565 def : Pat<(f32 (uint_to_fp GR32:$src)),
6566 (VCVTUSI2SSZrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
6567 def : Pat<(f32 (uint_to_fp GR64:$src)),
6568 (VCVTUSI642SSZrr (f32 (IMPLICIT_DEF)), GR64:$src)>;
6569 def : Pat<(f64 (uint_to_fp GR32:$src)),
6570 (VCVTUSI2SDZrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
6571 def : Pat<(f64 (uint_to_fp GR64:$src)),
6572 (VCVTUSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>;
6575 //===----------------------------------------------------------------------===//
6576 // AVX-512 Scalar convert from float/double to integer
6577 //===----------------------------------------------------------------------===//
6578 multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT ,
6579 X86VectorVTInfo DstVT, SDNode OpNode, string asm> {
6580 let Predicates = [HasAVX512] in {
6581 def rr : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
6582 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6583 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>,
6585 def rb : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc),
6586 !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"),
6587 [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>,
6588 EVEX, VEX_LIG, EVEX_B, EVEX_RC;
6589 def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src),
6590 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6591 [(set DstVT.RC:$dst, (OpNode
6592 (SrcVT.VT SrcVT.ScalarIntMemCPat:$src),
6593 (i32 FROUND_CURRENT)))]>,
6595 } // Predicates = [HasAVX512]
6598 // Convert float/double to signed/unsigned int 32/64
6599 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,
6600 X86cvts2si, "cvtss2si">,
6601 XS, EVEX_CD8<32, CD8VT1>;
6602 defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info,
6603 X86cvts2si, "cvtss2si">,
6604 XS, VEX_W, EVEX_CD8<32, CD8VT1>;
6605 defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info,
6606 X86cvts2usi, "cvtss2usi">,
6607 XS, EVEX_CD8<32, CD8VT1>;
6608 defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info,
6609 X86cvts2usi, "cvtss2usi">, XS, VEX_W,
6610 EVEX_CD8<32, CD8VT1>;
6611 defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info,
6612 X86cvts2si, "cvtsd2si">,
6613 XD, EVEX_CD8<64, CD8VT1>;
6614 defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info,
6615 X86cvts2si, "cvtsd2si">,
6616 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6617 defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info,
6618 X86cvts2usi, "cvtsd2usi">,
6619 XD, EVEX_CD8<64, CD8VT1>;
6620 defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
6621 X86cvts2usi, "cvtsd2usi">, XD, VEX_W,
6622 EVEX_CD8<64, CD8VT1>;
6624 // The SSE version of these instructions are disabled for AVX512.
6625 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
6626 let Predicates = [HasAVX512] in {
6627 def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
6628 (VCVTSS2SIZrr VR128X:$src)>;
6629 def : Pat<(i32 (int_x86_sse_cvtss2si sse_load_f32:$src)),
6630 (VCVTSS2SIZrm sse_load_f32:$src)>;
6631 def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
6632 (VCVTSS2SI64Zrr VR128X:$src)>;
6633 def : Pat<(i64 (int_x86_sse_cvtss2si64 sse_load_f32:$src)),
6634 (VCVTSS2SI64Zrm sse_load_f32:$src)>;
6635 def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
6636 (VCVTSD2SIZrr VR128X:$src)>;
6637 def : Pat<(i32 (int_x86_sse2_cvtsd2si sse_load_f64:$src)),
6638 (VCVTSD2SIZrm sse_load_f64:$src)>;
6639 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
6640 (VCVTSD2SI64Zrr VR128X:$src)>;
6641 def : Pat<(i64 (int_x86_sse2_cvtsd2si64 sse_load_f64:$src)),
6642 (VCVTSD2SI64Zrm sse_load_f64:$src)>;
6645 let Predicates = [HasAVX512] in {
6646 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
6647 (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
6648 def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
6649 (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
6650 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
6651 (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
6652 def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
6653 (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
6654 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
6655 (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6656 def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
6657 (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6658 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
6659 (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
6660 def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
6661 (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
6662 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
6663 (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
6664 def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
6665 (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
6666 } // Predicates = [HasAVX512]
6668 // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang
6669 // which produce unnecessary vmovs{s,d} instructions
6670 let Predicates = [HasAVX512] in {
6671 def : Pat<(v4f32 (X86Movss
6672 (v4f32 VR128X:$dst),
6673 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR64:$src)))))),
6674 (VCVTSI642SSZrr_Int VR128X:$dst, GR64:$src)>;
6676 def : Pat<(v4f32 (X86Movss
6677 (v4f32 VR128X:$dst),
6678 (v4f32 (scalar_to_vector (f32 (sint_to_fp GR32:$src)))))),
6679 (VCVTSI2SSZrr_Int VR128X:$dst, GR32:$src)>;
6681 def : Pat<(v2f64 (X86Movsd
6682 (v2f64 VR128X:$dst),
6683 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR64:$src)))))),
6684 (VCVTSI642SDZrr_Int VR128X:$dst, GR64:$src)>;
6686 def : Pat<(v2f64 (X86Movsd
6687 (v2f64 VR128X:$dst),
6688 (v2f64 (scalar_to_vector (f64 (sint_to_fp GR32:$src)))))),
6689 (VCVTSI2SDZrr_Int VR128X:$dst, GR32:$src)>;
6690 } // Predicates = [HasAVX512]
6692 // Convert float/double to signed/unsigned int 32/64 with truncation
6693 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
6694 X86VectorVTInfo _DstRC, SDNode OpNode,
6695 SDNode OpNodeRnd, string aliasStr>{
6696 let Predicates = [HasAVX512] in {
6697 def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6698 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6699 [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
6700 let hasSideEffects = 0 in
6701 def rb : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
6702 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6704 def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
6705 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6706 [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
6709 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6710 (!cast<Instruction>(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6711 def : InstAlias<asm # aliasStr # "\t\t{{sae}, $src, $dst|$dst, $src, {sae}}",
6712 (!cast<Instruction>(NAME # "rb") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
6713 def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
6714 (!cast<Instruction>(NAME # "rm") _DstRC.RC:$dst,
6715 _SrcRC.ScalarMemOp:$src), 0>;
6717 let isCodeGenOnly = 1 in {
6718 def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6719 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6720 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6721 (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
6722 def rb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
6723 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
6724 [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
6725 (i32 FROUND_NO_EXC)))]>,
6726 EVEX,VEX_LIG , EVEX_B;
6727 let mayLoad = 1, hasSideEffects = 0 in
6728 def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
6729 (ins _SrcRC.IntScalarMemOp:$src),
6730 !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
6733 } // isCodeGenOnly = 1
6738 defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
6739 fp_to_sint, X86cvtts2IntRnd, "{l}">,
6740 XS, EVEX_CD8<32, CD8VT1>;
6741 defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
6742 fp_to_sint, X86cvtts2IntRnd, "{q}">,
6743 VEX_W, XS, EVEX_CD8<32, CD8VT1>;
6744 defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
6745 fp_to_sint, X86cvtts2IntRnd, "{l}">,
6746 XD, EVEX_CD8<64, CD8VT1>;
6747 defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
6748 fp_to_sint, X86cvtts2IntRnd, "{q}">,
6749 VEX_W, XD, EVEX_CD8<64, CD8VT1>;
6751 defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
6752 fp_to_uint, X86cvtts2UIntRnd, "{l}">,
6753 XS, EVEX_CD8<32, CD8VT1>;
6754 defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
6755 fp_to_uint, X86cvtts2UIntRnd, "{q}">,
6756 XS,VEX_W, EVEX_CD8<32, CD8VT1>;
6757 defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
6758 fp_to_uint, X86cvtts2UIntRnd, "{l}">,
6759 XD, EVEX_CD8<64, CD8VT1>;
6760 defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
6761 fp_to_uint, X86cvtts2UIntRnd, "{q}">,
6762 XD, VEX_W, EVEX_CD8<64, CD8VT1>;
6763 let Predicates = [HasAVX512] in {
6764 def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
6765 (VCVTTSS2SIZrr_Int VR128X:$src)>;
6766 def : Pat<(i32 (int_x86_sse_cvttss2si sse_load_f32:$src)),
6767 (VCVTTSS2SIZrm_Int ssmem:$src)>;
6768 def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
6769 (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
6770 def : Pat<(i64 (int_x86_sse_cvttss2si64 sse_load_f32:$src)),
6771 (VCVTTSS2SI64Zrm_Int ssmem:$src)>;
6772 def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
6773 (VCVTTSD2SIZrr_Int VR128X:$src)>;
6774 def : Pat<(i32 (int_x86_sse2_cvttsd2si sse_load_f64:$src)),
6775 (VCVTTSD2SIZrm_Int sdmem:$src)>;
6776 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
6777 (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
6778 def : Pat<(i64 (int_x86_sse2_cvttsd2si64 sse_load_f64:$src)),
6779 (VCVTTSD2SI64Zrm_Int sdmem:$src)>;
6781 //===----------------------------------------------------------------------===//
6782 // AVX-512 Convert form float to double and back
6783 //===----------------------------------------------------------------------===//
6784 multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6785 X86VectorVTInfo _Src, SDNode OpNode> {
6786 defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6787 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6788 "$src2, $src1", "$src1, $src2",
6789 (_.VT (OpNode (_.VT _.RC:$src1),
6790 (_Src.VT _Src.RC:$src2),
6791 (i32 FROUND_CURRENT)))>,
6792 EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
6793 defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
6794 (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr,
6795 "$src2, $src1", "$src1, $src2",
6796 (_.VT (OpNode (_.VT _.RC:$src1),
6797 (_Src.VT _Src.ScalarIntMemCPat:$src2),
6798 (i32 FROUND_CURRENT)))>,
6799 EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
6801 let isCodeGenOnly = 1, hasSideEffects = 0 in {
6802 def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst),
6803 (ins _.FRC:$src1, _Src.FRC:$src2),
6804 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
6805 EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
6807 def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst),
6808 (ins _.FRC:$src1, _Src.ScalarMemOp:$src2),
6809 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
6810 EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
6814 // Scalar Coversion with SAE - suppress all exceptions
6815 multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6816 X86VectorVTInfo _Src, SDNode OpNodeRnd> {
6817 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6818 (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
6819 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
6820 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6821 (_Src.VT _Src.RC:$src2),
6822 (i32 FROUND_NO_EXC)))>,
6823 EVEX_4V, VEX_LIG, EVEX_B;
6826 // Scalar Conversion with rounding control (RC)
6827 multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6828 X86VectorVTInfo _Src, SDNode OpNodeRnd> {
6829 defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
6830 (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
6831 "$rc, $src2, $src1", "$src1, $src2, $rc",
6832 (_.VT (OpNodeRnd (_.VT _.RC:$src1),
6833 (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
6834 EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
6837 multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
6838 SDNode OpNodeRnd, X86VectorVTInfo _src,
6839 X86VectorVTInfo _dst> {
6840 let Predicates = [HasAVX512] in {
6841 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
6842 avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
6843 OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
6847 multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
6848 SDNode OpNodeRnd, X86VectorVTInfo _src,
6849 X86VectorVTInfo _dst> {
6850 let Predicates = [HasAVX512] in {
6851 defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
6852 avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
6853 EVEX_CD8<32, CD8VT1>, XS;
6856 defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
6857 X86froundRnd, f64x_info, f32x_info>;
6858 defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
6859 X86fpextRnd,f32x_info, f64x_info >;
6861 def : Pat<(f64 (fpextend FR32X:$src)),
6862 (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>,
6863 Requires<[HasAVX512]>;
6864 def : Pat<(f64 (fpextend (loadf32 addr:$src))),
6865 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6866 Requires<[HasAVX512]>;
6868 def : Pat<(f64 (extloadf32 addr:$src)),
6869 (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
6870 Requires<[HasAVX512, OptForSize]>;
6872 def : Pat<(f64 (extloadf32 addr:$src)),
6873 (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
6874 Requires<[HasAVX512, OptForSpeed]>;
6876 def : Pat<(f32 (fpround FR64X:$src)),
6877 (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>,
6878 Requires<[HasAVX512]>;
6880 def : Pat<(v4f32 (X86Movss
6881 (v4f32 VR128X:$dst),
6882 (v4f32 (scalar_to_vector
6883 (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))),
6884 (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>,
6885 Requires<[HasAVX512]>;
6887 def : Pat<(v2f64 (X86Movsd
6888 (v2f64 VR128X:$dst),
6889 (v2f64 (scalar_to_vector
6890 (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))),
6891 (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>,
6892 Requires<[HasAVX512]>;
6894 //===----------------------------------------------------------------------===//
6895 // AVX-512 Vector convert from signed/unsigned integer to float/double
6896 // and from float/double to signed/unsigned integer
6897 //===----------------------------------------------------------------------===//
6899 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6900 X86VectorVTInfo _Src, SDNode OpNode,
6901 string Broadcast = _.BroadcastStr,
6902 string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
6904 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6905 (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
6906 (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX;
6908 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6909 (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
6910 (_.VT (OpNode (_Src.VT
6911 (bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;
6913 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
6914 (ins _Src.ScalarMemOp:$src), OpcodeStr,
6915 "${src}"##Broadcast, "${src}"##Broadcast,
6916 (_.VT (OpNode (_Src.VT
6917 (X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
6920 // Coversion with SAE - suppress all exceptions
6921 multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6922 X86VectorVTInfo _Src, SDNode OpNodeRnd> {
6923 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6924 (ins _Src.RC:$src), OpcodeStr,
6925 "{sae}, $src", "$src, {sae}",
6926 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src),
6927 (i32 FROUND_NO_EXC)))>,
6931 // Conversion with rounding control (RC)
6932 multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
6933 X86VectorVTInfo _Src, SDNode OpNodeRnd> {
6934 defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
6935 (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr,
6936 "$rc, $src", "$src, $rc",
6937 (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>,
6938 EVEX, EVEX_B, EVEX_RC;
6941 // Extend Float to Double
6942 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
6943 let Predicates = [HasAVX512] in {
6944 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fpextend>,
6945 avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
6946 X86vfpextRnd>, EVEX_V512;
6948 let Predicates = [HasVLX] in {
6949 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
6950 X86vfpext, "{1to2}", "", f64mem>, EVEX_V128;
6951 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend>,
6956 // Truncate Double to Float
6957 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> {
6958 let Predicates = [HasAVX512] in {
6959 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround>,
6960 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
6961 X86vfproundRnd>, EVEX_V512;
6963 let Predicates = [HasVLX] in {
6964 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
6965 X86vfpround, "{1to2}", "{x}">, EVEX_V128;
6966 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
6967 "{1to4}", "{y}">, EVEX_V256;
6969 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6970 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
6971 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
6972 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
6973 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6974 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
6975 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
6976 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
6980 defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps">,
6981 VEX_W, PD, EVEX_CD8<64, CD8VF>;
6982 defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd">,
6983 PS, EVEX_CD8<32, CD8VH>;
6985 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
6986 (VCVTPS2PDZrm addr:$src)>;
6988 let Predicates = [HasVLX] in {
6989 let AddedComplexity = 15 in
6990 def : Pat<(X86vzmovl (v2f64 (bitconvert
6991 (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
6992 (VCVTPD2PSZ128rr VR128X:$src)>;
6993 def : Pat<(v2f64 (extloadv2f32 addr:$src)),
6994 (VCVTPS2PDZ128rm addr:$src)>;
6995 def : Pat<(v4f64 (extloadv4f32 addr:$src)),
6996 (VCVTPS2PDZ256rm addr:$src)>;
6999 // Convert Signed/Unsigned Doubleword to Double
7000 multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
7002 // No rounding in this op
7003 let Predicates = [HasAVX512] in
7004 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode>,
7007 let Predicates = [HasVLX] in {
7008 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
7009 OpNode128, "{1to2}", "", i64mem>, EVEX_V128;
7010 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>,
7015 // Convert Signed/Unsigned Doubleword to Float
7016 multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7018 let Predicates = [HasAVX512] in
7019 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode>,
7020 avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info,
7021 OpNodeRnd>, EVEX_V512;
7023 let Predicates = [HasVLX] in {
7024 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode>,
7026 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode>,
7031 // Convert Float to Signed/Unsigned Doubleword with truncation
7032 multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr,
7033 SDNode OpNode, SDNode OpNodeRnd> {
7034 let Predicates = [HasAVX512] in {
7035 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
7036 avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info,
7037 OpNodeRnd>, EVEX_V512;
7039 let Predicates = [HasVLX] in {
7040 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
7042 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
7047 // Convert Float to Signed/Unsigned Doubleword
7048 multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr,
7049 SDNode OpNode, SDNode OpNodeRnd> {
7050 let Predicates = [HasAVX512] in {
7051 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode>,
7052 avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info,
7053 OpNodeRnd>, EVEX_V512;
7055 let Predicates = [HasVLX] in {
7056 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode>,
7058 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode>,
7063 // Convert Double to Signed/Unsigned Doubleword with truncation
7064 multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7065 SDNode OpNode128, SDNode OpNodeRnd> {
7066 let Predicates = [HasAVX512] in {
7067 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
7068 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
7069 OpNodeRnd>, EVEX_V512;
7071 let Predicates = [HasVLX] in {
7072 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7073 // memory forms of these instructions in Asm Parser. They have the same
7074 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7075 // due to the same reason.
7076 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
7077 OpNode128, "{1to2}", "{x}">, EVEX_V128;
7078 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7079 "{1to4}", "{y}">, EVEX_V256;
7081 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7082 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7083 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7084 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7085 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7086 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7087 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7088 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
7092 // Convert Double to Signed/Unsigned Doubleword
7093 multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
7094 SDNode OpNode, SDNode OpNodeRnd> {
7095 let Predicates = [HasAVX512] in {
7096 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
7097 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info,
7098 OpNodeRnd>, EVEX_V512;
7100 let Predicates = [HasVLX] in {
7101 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7102 // memory forms of these instructions in Asm Parcer. They have the same
7103 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7104 // due to the same reason.
7105 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
7106 "{1to2}", "{x}">, EVEX_V128;
7107 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
7108 "{1to4}", "{y}">, EVEX_V256;
7110 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7111 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7112 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7113 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
7114 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7115 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7116 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7117 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
7121 // Convert Double to Signed/Unsigned Quardword
7122 multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr,
7123 SDNode OpNode, SDNode OpNodeRnd> {
7124 let Predicates = [HasDQI] in {
7125 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
7126 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info,
7127 OpNodeRnd>, EVEX_V512;
7129 let Predicates = [HasDQI, HasVLX] in {
7130 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
7132 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
7137 // Convert Double to Signed/Unsigned Quardword with truncation
7138 multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr,
7139 SDNode OpNode, SDNode OpNodeRnd> {
7140 let Predicates = [HasDQI] in {
7141 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode>,
7142 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info,
7143 OpNodeRnd>, EVEX_V512;
7145 let Predicates = [HasDQI, HasVLX] in {
7146 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode>,
7148 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode>,
7153 // Convert Signed/Unsigned Quardword to Double
7154 multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr,
7155 SDNode OpNode, SDNode OpNodeRnd> {
7156 let Predicates = [HasDQI] in {
7157 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode>,
7158 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info,
7159 OpNodeRnd>, EVEX_V512;
7161 let Predicates = [HasDQI, HasVLX] in {
7162 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode>,
7164 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode>,
7169 // Convert Float to Signed/Unsigned Quardword
7170 multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr,
7171 SDNode OpNode, SDNode OpNodeRnd> {
7172 let Predicates = [HasDQI] in {
7173 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
7174 avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info,
7175 OpNodeRnd>, EVEX_V512;
7177 let Predicates = [HasDQI, HasVLX] in {
7178 // Explicitly specified broadcast string, since we take only 2 elements
7179 // from v4f32x_info source
7180 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
7181 "{1to2}", "", f64mem>, EVEX_V128;
7182 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
7187 // Convert Float to Signed/Unsigned Quardword with truncation
7188 multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
7189 SDNode OpNode128, SDNode OpNodeRnd> {
7190 let Predicates = [HasDQI] in {
7191 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
7192 avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
7193 OpNodeRnd>, EVEX_V512;
7195 let Predicates = [HasDQI, HasVLX] in {
7196 // Explicitly specified broadcast string, since we take only 2 elements
7197 // from v4f32x_info source
7198 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
7199 "{1to2}", "", f64mem>, EVEX_V128;
7200 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
7205 // Convert Signed/Unsigned Quardword to Float
7206 multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
7207 SDNode OpNode128, SDNode OpNodeRnd> {
7208 let Predicates = [HasDQI] in {
7209 defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>,
7210 avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
7211 OpNodeRnd>, EVEX_V512;
7213 let Predicates = [HasDQI, HasVLX] in {
7214 // we need "x"/"y" suffixes in order to distinguish between 128 and 256
7215 // memory forms of these instructions in Asm Parcer. They have the same
7216 // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
7217 // due to the same reason.
7218 defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
7219 "{1to2}", "{x}">, EVEX_V128;
7220 defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
7221 "{1to4}", "{y}">, EVEX_V256;
7223 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7224 (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
7225 def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
7226 (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
7227 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7228 (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
7229 def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
7230 (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
7234 defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP>,
7235 XS, EVEX_CD8<32, CD8VH>;
7237 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
7239 PS, EVEX_CD8<32, CD8VF>;
7241 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
7243 XS, EVEX_CD8<32, CD8VF>;
7245 defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
7247 PD, VEX_W, EVEX_CD8<64, CD8VF>;
7249 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
7250 X86cvttp2uiRnd>, PS,
7251 EVEX_CD8<32, CD8VF>;
7253 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
7254 X86cvttp2ui, X86cvttp2uiRnd>, PS, VEX_W,
7255 EVEX_CD8<64, CD8VF>;
7257 defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86VUintToFP>,
7258 XS, EVEX_CD8<32, CD8VH>;
7260 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
7261 X86VUintToFpRnd>, XD,
7262 EVEX_CD8<32, CD8VF>;
7264 defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int,
7265 X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VF>;
7267 defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int,
7268 X86cvtp2IntRnd>, XD, VEX_W,
7269 EVEX_CD8<64, CD8VF>;
7271 defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt,
7273 PS, EVEX_CD8<32, CD8VF>;
7274 defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt,
7275 X86cvtp2UIntRnd>, VEX_W,
7276 PS, EVEX_CD8<64, CD8VF>;
7278 defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int,
7279 X86cvtp2IntRnd>, VEX_W,
7280 PD, EVEX_CD8<64, CD8VF>;
7282 defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int,
7283 X86cvtp2IntRnd>, PD, EVEX_CD8<32, CD8VH>;
7285 defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt,
7286 X86cvtp2UIntRnd>, VEX_W,
7287 PD, EVEX_CD8<64, CD8VF>;
7289 defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
7290 X86cvtp2UIntRnd>, PD, EVEX_CD8<32, CD8VH>;
7292 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
7293 X86cvttp2siRnd>, VEX_W,
7294 PD, EVEX_CD8<64, CD8VF>;
7296 defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
7297 X86cvttp2siRnd>, PD, EVEX_CD8<32, CD8VH>;
7299 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
7300 X86cvttp2uiRnd>, VEX_W,
7301 PD, EVEX_CD8<64, CD8VF>;
7303 defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
7304 X86cvttp2uiRnd>, PD, EVEX_CD8<32, CD8VH>;
7306 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
7307 X86VSintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
7309 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
7310 X86VUintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
7312 defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
7313 X86VSintToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>;
7315 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
7316 X86VUintToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
7318 let Predicates = [HasAVX512, NoVLX] in {
7319 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
7320 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7321 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7322 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7324 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
7325 (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
7326 (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
7327 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7329 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
7330 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
7331 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7332 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7334 def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
7335 (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
7336 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7337 VR128X:$src, sub_xmm)))), sub_xmm)>;
7339 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
7340 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7341 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7342 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7344 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
7345 (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
7346 (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
7347 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7349 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
7350 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7351 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7352 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7354 def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
7355 (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
7356 (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
7357 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7360 let Predicates = [HasAVX512, HasVLX] in {
7361 let AddedComplexity = 15 in {
7362 def : Pat<(X86vzmovl (v2i64 (bitconvert
7363 (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
7364 (VCVTPD2DQZ128rr VR128X:$src)>;
7365 def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
7366 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))))),
7367 (VCVTPD2UDQZ128rr VR128X:$src)>;
7368 def : Pat<(X86vzmovl (v2i64 (bitconvert
7369 (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
7370 (VCVTTPD2DQZ128rr VR128X:$src)>;
7371 def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
7372 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))))),
7373 (VCVTTPD2UDQZ128rr VR128X:$src)>;
7377 let Predicates = [HasAVX512] in {
7378 def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
7379 (VCVTPD2PSZrm addr:$src)>;
7380 def : Pat<(v8f64 (extloadv8f32 addr:$src)),
7381 (VCVTPS2PDZrm addr:$src)>;
7384 let Predicates = [HasDQI, HasVLX] in {
7385 let AddedComplexity = 15 in {
7386 def : Pat<(X86vzmovl (v2f64 (bitconvert
7387 (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
7388 (VCVTQQ2PSZ128rr VR128X:$src)>;
7389 def : Pat<(X86vzmovl (v2f64 (bitconvert
7390 (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
7391 (VCVTUQQ2PSZ128rr VR128X:$src)>;
7395 let Predicates = [HasDQI, NoVLX] in {
7396 def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
7397 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7398 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7399 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7401 def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
7402 (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
7403 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7404 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7406 def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
7407 (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
7408 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7409 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7411 def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
7412 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7413 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7414 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7416 def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
7417 (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
7418 (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
7419 VR128X:$src1, sub_xmm)))), sub_ymm)>;
7421 def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
7422 (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
7423 (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
7424 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7426 def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
7427 (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
7428 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7429 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7431 def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
7432 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7433 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7434 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7436 def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
7437 (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
7438 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7439 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7441 def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
7442 (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
7443 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7444 VR256X:$src1, sub_ymm)))), sub_xmm)>;
7446 def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
7447 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7448 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7449 VR128X:$src1, sub_xmm)))), sub_xmm)>;
7451 def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
7452 (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
7453 (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
7454 VR256X:$src1, sub_ymm)))), sub_ymm)>;
7457 //===----------------------------------------------------------------------===//
7458 // Half precision conversion instructions
7459 //===----------------------------------------------------------------------===//
7460 multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7461 X86MemOperand x86memop, PatFrag ld_frag> {
7462 defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
7463 "vcvtph2ps", "$src", "$src",
7464 (X86cvtph2ps (_src.VT _src.RC:$src),
7465 (i32 FROUND_CURRENT))>, T8PD;
7466 defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
7467 "vcvtph2ps", "$src", "$src",
7468 (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
7469 (i32 FROUND_CURRENT))>, T8PD;
7472 multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
7473 defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
7474 "vcvtph2ps", "{sae}, $src", "$src, {sae}",
7475 (X86cvtph2ps (_src.VT _src.RC:$src),
7476 (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
7480 let Predicates = [HasAVX512] in {
7481 defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
7482 avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>,
7483 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
7484 let Predicates = [HasVLX] in {
7485 defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem,
7486 loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
7487 defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
7488 loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
7492 multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
7493 X86MemOperand x86memop> {
7494 defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
7495 (ins _src.RC:$src1, i32u8imm:$src2),
7496 "vcvtps2ph", "$src2, $src1", "$src1, $src2",
7497 (X86cvtps2ph (_src.VT _src.RC:$src1),
7499 NoItinerary, 0, 0, X86select>, AVX512AIi8Base;
7500 def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
7501 (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
7502 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
7503 [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
7506 let hasSideEffects = 0, mayStore = 1 in
7507 def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
7508 (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
7509 "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
7512 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
7513 let hasSideEffects = 0 in
7514 defm rb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
7515 (outs _dest.RC:$dst),
7516 (ins _src.RC:$src1, i32u8imm:$src2),
7517 "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
7518 []>, EVEX_B, AVX512AIi8Base;
7520 let Predicates = [HasAVX512] in {
7521 defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
7522 avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
7523 EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
7524 let Predicates = [HasVLX] in {
7525 defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
7526 EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
7527 defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem>,
7528 EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
7532 // Patterns for matching conversions from float to half-float and vice versa.
7533 let Predicates = [HasVLX] in {
7534 // Use MXCSR.RC for rounding instead of explicitly specifying the default
7535 // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
7536 // configurations we support (the default). However, falling back to MXCSR is
7537 // more consistent with other instructions, which are always controlled by it.
7538 // It's encoded as 0b100.
7539 def : Pat<(fp_to_f16 FR32X:$src),
7540 (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
7541 (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
7543 def : Pat<(f16_to_fp GR16:$src),
7544 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7545 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
7547 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7548 (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
7549 (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
7552 // Patterns for matching float to half-float conversion when AVX512 is supported
7553 // but F16C isn't. In that case we have to use 512-bit vectors.
7554 let Predicates = [HasAVX512, NoVLX, NoF16C] in {
7555 def : Pat<(fp_to_f16 FR32X:$src),
7556 (i16 (EXTRACT_SUBREG
7558 (v8i16 (EXTRACT_SUBREG
7560 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
7561 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
7562 sub_xmm), 4), sub_xmm))), sub_16bit))>;
7564 def : Pat<(f16_to_fp GR16:$src),
7565 (f32 (COPY_TO_REGCLASS
7566 (v4f32 (EXTRACT_SUBREG
7568 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
7569 (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)),
7570 sub_xmm)), sub_xmm)), FR32X))>;
7572 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
7573 (f32 (COPY_TO_REGCLASS
7574 (v4f32 (EXTRACT_SUBREG
7576 (VCVTPS2PHZrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
7577 (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
7578 sub_xmm), 4)), sub_xmm)), FR32X))>;
7581 // Unordered/Ordered scalar fp compare with Sea and set EFLAGS
7582 multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
7584 def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
7585 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
7586 [], IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
7590 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7591 defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss">,
7592 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7593 defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd">,
7594 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7595 defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss">,
7596 AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
7597 defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd">,
7598 AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
7601 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
7602 defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
7603 "ucomiss">, PS, EVEX, VEX_LIG,
7604 EVEX_CD8<32, CD8VT1>;
7605 defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86cmp, f64, f64mem, loadf64,
7606 "ucomisd">, PD, EVEX,
7607 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7608 let Pattern = []<dag> in {
7609 defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
7610 "comiss">, PS, EVEX, VEX_LIG,
7611 EVEX_CD8<32, CD8VT1>;
7612 defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
7613 "comisd">, PD, EVEX,
7614 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7616 let isCodeGenOnly = 1 in {
7617 defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
7618 sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG,
7619 EVEX_CD8<32, CD8VT1>;
7620 defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
7621 sse_load_f64, "ucomisd">, PD, EVEX,
7622 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7624 defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
7625 sse_load_f32, "comiss">, PS, EVEX, VEX_LIG,
7626 EVEX_CD8<32, CD8VT1>;
7627 defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
7628 sse_load_f64, "comisd">, PD, EVEX,
7629 VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
7633 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
7634 multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
7635 X86VectorVTInfo _> {
7636 let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in {
7637 defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7638 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7639 "$src2, $src1", "$src1, $src2",
7640 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
7641 defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7642 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
7643 "$src2, $src1", "$src1, $src2",
7644 (OpNode (_.VT _.RC:$src1),
7645 (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
7649 defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
7650 EVEX_CD8<32, CD8VT1>, T8PD;
7651 defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
7652 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
7653 defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
7654 EVEX_CD8<32, CD8VT1>, T8PD;
7655 defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
7656 VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
7658 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
7659 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
7660 X86VectorVTInfo _> {
7661 let ExeDomain = _.ExeDomain in {
7662 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7663 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7664 (_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD;
7665 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7666 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7668 (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD;
7669 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7670 (ins _.ScalarMemOp:$src), OpcodeStr,
7671 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7673 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
7678 multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
7679 defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, v16f32_info>,
7680 EVEX_V512, EVEX_CD8<32, CD8VF>;
7681 defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, v8f64_info>,
7682 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7684 // Define only if AVX512VL feature is present.
7685 let Predicates = [HasVLX] in {
7686 defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7687 OpNode, v4f32x_info>,
7688 EVEX_V128, EVEX_CD8<32, CD8VF>;
7689 defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
7690 OpNode, v8f32x_info>,
7691 EVEX_V256, EVEX_CD8<32, CD8VF>;
7692 defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7693 OpNode, v2f64x_info>,
7694 EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
7695 defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
7696 OpNode, v4f64x_info>,
7697 EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
7701 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
7702 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
7704 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
7705 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
7707 let ExeDomain = _.ExeDomain in {
7708 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7709 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7710 "$src2, $src1", "$src1, $src2",
7711 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7712 (i32 FROUND_CURRENT))>;
7714 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7715 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7716 "{sae}, $src2, $src1", "$src1, $src2, {sae}",
7717 (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7718 (i32 FROUND_NO_EXC))>, EVEX_B;
7720 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7721 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
7722 "$src2, $src1", "$src1, $src2",
7723 (OpNode (_.VT _.RC:$src1),
7724 (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
7725 (i32 FROUND_CURRENT))>;
7729 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode> {
7730 defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode>,
7731 EVEX_CD8<32, CD8VT1>;
7732 defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode>,
7733 EVEX_CD8<64, CD8VT1>, VEX_W;
7736 let Predicates = [HasERI] in {
7737 defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s>, T8PD, EVEX_4V;
7738 defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
7741 defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
7742 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
7744 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7746 let ExeDomain = _.ExeDomain in {
7747 defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7748 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7749 (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>;
7751 defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7752 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7754 (bitconvert (_.LdFrag addr:$src))),
7755 (i32 FROUND_CURRENT))>;
7757 defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7758 (ins _.ScalarMemOp:$src), OpcodeStr,
7759 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7761 (X86VBroadcast (_.ScalarLdFrag addr:$src))),
7762 (i32 FROUND_CURRENT))>, EVEX_B;
7765 multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
7767 let ExeDomain = _.ExeDomain in
7768 defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7769 (ins _.RC:$src), OpcodeStr,
7770 "{sae}, $src", "$src, {sae}",
7771 (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, EVEX_B;
7774 multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode> {
7775 defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
7776 avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode>,
7777 T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
7778 defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
7779 avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode>,
7780 T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
7783 multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
7785 // Define only if AVX512VL feature is present.
7786 let Predicates = [HasVLX] in {
7787 defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode>,
7788 EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>;
7789 defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode>,
7790 EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
7791 defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode>,
7792 EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7793 defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode>,
7794 EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
7797 let Predicates = [HasERI] in {
7799 defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX;
7800 defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX;
7801 defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX;
7803 defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd>,
7804 avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd> , EVEX;
7806 multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr,
7807 SDNode OpNodeRnd, X86VectorVTInfo _>{
7808 let ExeDomain = _.ExeDomain in
7809 defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7810 (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc",
7811 (_.VT (OpNodeRnd _.RC:$src, (i32 imm:$rc)))>,
7812 EVEX, EVEX_B, EVEX_RC;
7815 multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
7816 SDNode OpNode, X86VectorVTInfo _>{
7817 let ExeDomain = _.ExeDomain in {
7818 defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
7819 (ins _.RC:$src), OpcodeStr, "$src", "$src",
7820 (_.FloatVT (OpNode _.RC:$src))>, EVEX;
7821 defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7822 (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
7824 (bitconvert (_.LdFrag addr:$src))))>, EVEX;
7826 defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
7827 (ins _.ScalarMemOp:$src), OpcodeStr,
7828 "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr,
7830 (X86VBroadcast (_.ScalarLdFrag addr:$src))))>,
7835 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
7837 defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
7839 EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7840 defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
7842 EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7843 // Define only if AVX512VL feature is present.
7844 let Predicates = [HasVLX] in {
7845 defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7846 OpNode, v4f32x_info>,
7847 EVEX_V128, PS, EVEX_CD8<32, CD8VF>;
7848 defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
7849 OpNode, v8f32x_info>,
7850 EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
7851 defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7852 OpNode, v2f64x_info>,
7853 EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7854 defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
7855 OpNode, v4f64x_info>,
7856 EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7860 multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
7862 defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), OpNodeRnd,
7863 v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
7864 defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), OpNodeRnd,
7865 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
7868 multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
7869 string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
7870 let ExeDomain = _.ExeDomain in {
7871 defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7872 (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
7873 "$src2, $src1", "$src1, $src2",
7874 (OpNodeRnd (_.VT _.RC:$src1),
7876 (i32 FROUND_CURRENT))>;
7877 defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7878 (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
7879 "$src2, $src1", "$src1, $src2",
7880 (OpNodeRnd (_.VT _.RC:$src1),
7881 (_.VT (scalar_to_vector
7882 (_.ScalarLdFrag addr:$src2))),
7883 (i32 FROUND_CURRENT))>;
7885 defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7886 (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
7887 "$rc, $src2, $src1", "$src1, $src2, $rc",
7888 (OpNodeRnd (_.VT _.RC:$src1),
7893 let isCodeGenOnly = 1, hasSideEffects = 0 in {
7894 def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
7895 (ins _.FRC:$src1, _.FRC:$src2),
7896 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
7899 def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
7900 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
7901 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
7905 def : Pat<(_.EltVT (OpNode _.FRC:$src)),
7906 (!cast<Instruction>(NAME#SUFF#Zr)
7907 (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
7909 def : Pat<(_.EltVT (OpNode (load addr:$src))),
7910 (!cast<Instruction>(NAME#SUFF#Zm)
7911 (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>;
7914 multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
7915 defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
7916 X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
7917 defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
7918 X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
7921 defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
7922 avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
7924 defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
7926 let Predicates = [HasAVX512] in {
7927 def : Pat<(f32 (X86frsqrt FR32X:$src)),
7928 (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
7929 def : Pat<(f32 (X86frsqrt (load addr:$src))),
7930 (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
7931 Requires<[OptForSize]>;
7932 def : Pat<(f32 (X86frcp FR32X:$src)),
7933 (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
7934 def : Pat<(f32 (X86frcp (load addr:$src))),
7935 (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
7936 Requires<[OptForSize]>;
7940 avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
7942 let ExeDomain = _.ExeDomain in {
7943 defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7944 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7945 "$src3, $src2, $src1", "$src1, $src2, $src3",
7946 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7947 (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
7949 defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
7950 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
7951 "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
7952 (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
7953 (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
7955 defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
7956 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
7958 "$src3, $src2, $src1", "$src1, $src2, $src3",
7959 (_.VT (X86RndScales (_.VT _.RC:$src1),
7960 (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
7961 (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
7963 let Predicates = [HasAVX512] in {
7964 def : Pat<(ffloor _.FRC:$src), (COPY_TO_REGCLASS
7965 (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
7966 (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x9))), _.FRC)>;
7967 def : Pat<(fceil _.FRC:$src), (COPY_TO_REGCLASS
7968 (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
7969 (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xa))), _.FRC)>;
7970 def : Pat<(ftrunc _.FRC:$src), (COPY_TO_REGCLASS
7971 (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
7972 (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xb))), _.FRC)>;
7973 def : Pat<(frint _.FRC:$src), (COPY_TO_REGCLASS
7974 (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
7975 (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0x4))), _.FRC)>;
7976 def : Pat<(fnearbyint _.FRC:$src), (COPY_TO_REGCLASS
7977 (_.VT (!cast<Instruction>(NAME##r) (_.VT (IMPLICIT_DEF)),
7978 (_.VT (COPY_TO_REGCLASS _.FRC:$src, _.RC)), (i32 0xc))), _.FRC)>;
7980 def : Pat<(ffloor (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
7981 (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
7982 addr:$src, (i32 0x9))), _.FRC)>;
7983 def : Pat<(fceil (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
7984 (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
7985 addr:$src, (i32 0xa))), _.FRC)>;
7986 def : Pat<(ftrunc (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
7987 (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
7988 addr:$src, (i32 0xb))), _.FRC)>;
7989 def : Pat<(frint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
7990 (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
7991 addr:$src, (i32 0x4))), _.FRC)>;
7992 def : Pat<(fnearbyint (_.ScalarLdFrag addr:$src)), (COPY_TO_REGCLASS
7993 (_.VT (!cast<Instruction>(NAME##m) (_.VT (IMPLICIT_DEF)),
7994 addr:$src, (i32 0xc))), _.FRC)>;
7998 defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
7999 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>;
8001 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
8002 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
8004 //-------------------------------------------------
8005 // Integer truncate and extend operations
8006 //-------------------------------------------------
8008 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
8009 X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
8010 X86MemOperand x86memop> {
8011 let ExeDomain = DestInfo.ExeDomain in
8012 defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
8013 (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
8014 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
8017 // for intrinsic patter match
8018 def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
8019 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8021 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
8024 def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
8025 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8026 DestInfo.ImmAllZerosV)),
8027 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
8030 def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
8031 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
8032 DestInfo.RC:$src0)),
8033 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0,
8034 DestInfo.KRCWM:$mask ,
8037 let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
8038 ExeDomain = DestInfo.ExeDomain in {
8039 def mr : AVX512XS8I<opc, MRMDestMem, (outs),
8040 (ins x86memop:$dst, SrcInfo.RC:$src),
8041 OpcodeStr # "\t{$src, $dst|$dst, $src}",
8044 def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
8045 (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
8046 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8048 }//mayStore = 1, mayLoad = 1, hasSideEffects = 0
8051 multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
8052 X86VectorVTInfo DestInfo,
8053 PatFrag truncFrag, PatFrag mtruncFrag > {
8055 def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
8056 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
8057 addr:$dst, SrcInfo.RC:$src)>;
8059 def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
8060 (SrcInfo.VT SrcInfo.RC:$src)),
8061 (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
8062 addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
8065 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
8066 AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
8067 X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
8068 X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
8069 X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
8070 Predicate prd = HasAVX512>{
8072 let Predicates = [HasVLX, prd] in {
8073 defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
8074 DestInfoZ128, x86memopZ128>,
8075 avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
8076 truncFrag, mtruncFrag>, EVEX_V128;
8078 defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
8079 DestInfoZ256, x86memopZ256>,
8080 avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
8081 truncFrag, mtruncFrag>, EVEX_V256;
8083 let Predicates = [prd] in
8084 defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
8085 DestInfoZ, x86memopZ>,
8086 avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
8087 truncFrag, mtruncFrag>, EVEX_V512;
8090 multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8091 PatFrag StoreNode, PatFrag MaskedStoreNode> {
8092 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
8093 v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
8094 StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
8097 multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8098 PatFrag StoreNode, PatFrag MaskedStoreNode> {
8099 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
8100 v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
8101 StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
8104 multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
8105 PatFrag StoreNode, PatFrag MaskedStoreNode> {
8106 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
8107 v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
8108 StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
8111 multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
8112 PatFrag StoreNode, PatFrag MaskedStoreNode> {
8113 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
8114 v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
8115 StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
8118 multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
8119 PatFrag StoreNode, PatFrag MaskedStoreNode> {
8120 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
8121 v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
8122 StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
8125 multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
8126 PatFrag StoreNode, PatFrag MaskedStoreNode> {
8127 defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
8128 v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
8129 StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
8132 defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc,
8133 truncstorevi8, masked_truncstorevi8>;
8134 defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs,
8135 truncstore_s_vi8, masked_truncstore_s_vi8>;
8136 defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
8137 truncstore_us_vi8, masked_truncstore_us_vi8>;
8139 defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc,
8140 truncstorevi16, masked_truncstorevi16>;
8141 defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs,
8142 truncstore_s_vi16, masked_truncstore_s_vi16>;
8143 defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
8144 truncstore_us_vi16, masked_truncstore_us_vi16>;
8146 defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc,
8147 truncstorevi32, masked_truncstorevi32>;
8148 defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs,
8149 truncstore_s_vi32, masked_truncstore_s_vi32>;
8150 defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
8151 truncstore_us_vi32, masked_truncstore_us_vi32>;
8153 defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc,
8154 truncstorevi8, masked_truncstorevi8>;
8155 defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs,
8156 truncstore_s_vi8, masked_truncstore_s_vi8>;
8157 defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
8158 truncstore_us_vi8, masked_truncstore_us_vi8>;
8160 defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc,
8161 truncstorevi16, masked_truncstorevi16>;
8162 defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs,
8163 truncstore_s_vi16, masked_truncstore_s_vi16>;
8164 defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
8165 truncstore_us_vi16, masked_truncstore_us_vi16>;
8167 defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc,
8168 truncstorevi8, masked_truncstorevi8>;
8169 defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs,
8170 truncstore_s_vi8, masked_truncstore_s_vi8>;
8171 defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
8172 truncstore_us_vi8, masked_truncstore_us_vi8>;
8174 let Predicates = [HasAVX512, NoVLX] in {
8175 def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
8176 (v8i16 (EXTRACT_SUBREG
8177 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
8178 VR256X:$src, sub_ymm)))), sub_xmm))>;
8179 def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
8180 (v4i32 (EXTRACT_SUBREG
8181 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
8182 VR256X:$src, sub_ymm)))), sub_xmm))>;
8185 let Predicates = [HasBWI, NoVLX] in {
8186 def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
8187 (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
8188 VR256X:$src, sub_ymm))), sub_xmm))>;
8191 multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
8192 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
8193 X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
8194 let ExeDomain = DestInfo.ExeDomain in {
8195 defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8196 (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
8197 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
8200 defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8201 (ins x86memop:$src), OpcodeStr ,"$src", "$src",
8202 (DestInfo.VT (LdFrag addr:$src))>,
8207 multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
8208 SDPatternOperator OpNode, SDPatternOperator InVecNode,
8209 string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8210 let Predicates = [HasVLX, HasBWI] in {
8211 defm Z128: avx512_extend_common<opc, OpcodeStr, v8i16x_info,
8212 v16i8x_info, i64mem, LdFrag, InVecNode>,
8213 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
8215 defm Z256: avx512_extend_common<opc, OpcodeStr, v16i16x_info,
8216 v16i8x_info, i128mem, LdFrag, OpNode>,
8217 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
8219 let Predicates = [HasBWI] in {
8220 defm Z : avx512_extend_common<opc, OpcodeStr, v32i16_info,
8221 v32i8x_info, i256mem, LdFrag, OpNode>,
8222 EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
8226 multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr,
8227 SDPatternOperator OpNode, SDPatternOperator InVecNode,
8228 string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8229 let Predicates = [HasVLX, HasAVX512] in {
8230 defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
8231 v16i8x_info, i32mem, LdFrag, InVecNode>,
8232 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
8234 defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
8235 v16i8x_info, i64mem, LdFrag, OpNode>,
8236 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
8238 let Predicates = [HasAVX512] in {
8239 defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
8240 v16i8x_info, i128mem, LdFrag, OpNode>,
8241 EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
8245 multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr,
8246 SDPatternOperator OpNode, SDPatternOperator InVecNode,
8247 string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
8248 let Predicates = [HasVLX, HasAVX512] in {
8249 defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
8250 v16i8x_info, i16mem, LdFrag, InVecNode>,
8251 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
8253 defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
8254 v16i8x_info, i32mem, LdFrag, OpNode>,
8255 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
8257 let Predicates = [HasAVX512] in {
8258 defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
8259 v16i8x_info, i64mem, LdFrag, OpNode>,
8260 EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
8264 multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr,
8265 SDPatternOperator OpNode, SDPatternOperator InVecNode,
8266 string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8267 let Predicates = [HasVLX, HasAVX512] in {
8268 defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
8269 v8i16x_info, i64mem, LdFrag, InVecNode>,
8270 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
8272 defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
8273 v8i16x_info, i128mem, LdFrag, OpNode>,
8274 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
8276 let Predicates = [HasAVX512] in {
8277 defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
8278 v16i16x_info, i256mem, LdFrag, OpNode>,
8279 EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
8283 multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr,
8284 SDPatternOperator OpNode, SDPatternOperator InVecNode,
8285 string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
8286 let Predicates = [HasVLX, HasAVX512] in {
8287 defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
8288 v8i16x_info, i32mem, LdFrag, InVecNode>,
8289 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
8291 defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
8292 v8i16x_info, i64mem, LdFrag, OpNode>,
8293 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
8295 let Predicates = [HasAVX512] in {
8296 defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
8297 v8i16x_info, i128mem, LdFrag, OpNode>,
8298 EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
8302 multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr,
8303 SDPatternOperator OpNode, SDPatternOperator InVecNode,
8304 string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
8306 let Predicates = [HasVLX, HasAVX512] in {
8307 defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
8308 v4i32x_info, i64mem, LdFrag, InVecNode>,
8309 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
8311 defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
8312 v4i32x_info, i128mem, LdFrag, OpNode>,
8313 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
8315 let Predicates = [HasAVX512] in {
8316 defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
8317 v8i32x_info, i256mem, LdFrag, OpNode>,
8318 EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
8322 defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z">;
8323 defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z">;
8324 defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z">;
8325 defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z">;
8326 defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z">;
8327 defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z">;
8329 defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s">;
8330 defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s">;
8331 defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s">;
8332 defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">;
8333 defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">;
8334 defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">;
8336 // EXTLOAD patterns, implemented using vpmovz
8337 multiclass avx512_ext_lowering<string InstrStr, X86VectorVTInfo To,
8338 X86VectorVTInfo From, PatFrag LdFrag> {
8339 def : Pat<(To.VT (LdFrag addr:$src)),
8340 (!cast<Instruction>("VPMOVZX"#InstrStr#"rm") addr:$src)>;
8341 def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src), To.RC:$src0)),
8342 (!cast<Instruction>("VPMOVZX"#InstrStr#"rmk") To.RC:$src0,
8343 To.KRC:$mask, addr:$src)>;
8344 def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src),
8346 (!cast<Instruction>("VPMOVZX"#InstrStr#"rmkz") To.KRC:$mask,
8350 let Predicates = [HasVLX, HasBWI] in {
8351 defm : avx512_ext_lowering<"BWZ128", v8i16x_info, v16i8x_info, extloadvi8>;
8352 defm : avx512_ext_lowering<"BWZ256", v16i16x_info, v16i8x_info, extloadvi8>;
8354 let Predicates = [HasBWI] in {
8355 defm : avx512_ext_lowering<"BWZ", v32i16_info, v32i8x_info, extloadvi8>;
8357 let Predicates = [HasVLX, HasAVX512] in {
8358 defm : avx512_ext_lowering<"BDZ128", v4i32x_info, v16i8x_info, extloadvi8>;
8359 defm : avx512_ext_lowering<"BDZ256", v8i32x_info, v16i8x_info, extloadvi8>;
8360 defm : avx512_ext_lowering<"BQZ128", v2i64x_info, v16i8x_info, extloadvi8>;
8361 defm : avx512_ext_lowering<"BQZ256", v4i64x_info, v16i8x_info, extloadvi8>;
8362 defm : avx512_ext_lowering<"WDZ128", v4i32x_info, v8i16x_info, extloadvi16>;
8363 defm : avx512_ext_lowering<"WDZ256", v8i32x_info, v8i16x_info, extloadvi16>;
8364 defm : avx512_ext_lowering<"WQZ128", v2i64x_info, v8i16x_info, extloadvi16>;
8365 defm : avx512_ext_lowering<"WQZ256", v4i64x_info, v8i16x_info, extloadvi16>;
8366 defm : avx512_ext_lowering<"DQZ128", v2i64x_info, v4i32x_info, extloadvi32>;
8367 defm : avx512_ext_lowering<"DQZ256", v4i64x_info, v4i32x_info, extloadvi32>;
8369 let Predicates = [HasAVX512] in {
8370 defm : avx512_ext_lowering<"BDZ", v16i32_info, v16i8x_info, extloadvi8>;
8371 defm : avx512_ext_lowering<"BQZ", v8i64_info, v16i8x_info, extloadvi8>;
8372 defm : avx512_ext_lowering<"WDZ", v16i32_info, v16i16x_info, extloadvi16>;
8373 defm : avx512_ext_lowering<"WQZ", v8i64_info, v8i16x_info, extloadvi16>;
8374 defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>;
8377 multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp,
8378 SDNode InVecOp, PatFrag ExtLoad16> {
8380 let Predicates = [HasVLX, HasBWI] in {
8381 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8382 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8383 def : Pat<(v8i16 (InVecOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8384 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8385 def : Pat<(v8i16 (InVecOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8386 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8387 def : Pat<(v8i16 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8388 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8389 def : Pat<(v8i16 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8390 (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
8392 let Predicates = [HasVLX] in {
8393 def : Pat<(v4i32 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8394 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8395 def : Pat<(v4i32 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8396 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8397 def : Pat<(v4i32 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8398 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8399 def : Pat<(v4i32 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8400 (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
8402 def : Pat<(v2i64 (InVecOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
8403 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8404 def : Pat<(v2i64 (InVecOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8405 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8406 def : Pat<(v2i64 (InVecOp (v16i8 (vzload_v2i64 addr:$src)))),
8407 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8408 def : Pat<(v2i64 (InVecOp (bc_v16i8 (loadv2i64 addr:$src)))),
8409 (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
8411 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8412 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8413 def : Pat<(v4i32 (InVecOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8414 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8415 def : Pat<(v4i32 (InVecOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8416 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8417 def : Pat<(v4i32 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8418 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8419 def : Pat<(v4i32 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8420 (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
8422 def : Pat<(v2i64 (InVecOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8423 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8424 def : Pat<(v2i64 (InVecOp (v8i16 (vzmovl_v4i32 addr:$src)))),
8425 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8426 def : Pat<(v2i64 (InVecOp (v8i16 (vzload_v2i64 addr:$src)))),
8427 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8428 def : Pat<(v2i64 (InVecOp (bc_v8i16 (loadv2i64 addr:$src)))),
8429 (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
8431 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8432 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8433 def : Pat<(v2i64 (InVecOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
8434 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8435 def : Pat<(v2i64 (InVecOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8436 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8437 def : Pat<(v2i64 (InVecOp (v4i32 (vzload_v2i64 addr:$src)))),
8438 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8439 def : Pat<(v2i64 (InVecOp (bc_v4i32 (loadv2i64 addr:$src)))),
8440 (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
8443 let Predicates = [HasVLX, HasBWI] in {
8444 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8445 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8446 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8447 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8448 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8449 (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
8451 let Predicates = [HasVLX] in {
8452 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8453 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8454 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
8455 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8456 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8457 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8458 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8459 (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
8461 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
8462 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8463 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
8464 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8465 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
8466 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8467 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8468 (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
8470 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8471 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8472 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8473 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8474 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8475 (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
8477 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8478 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8479 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
8480 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8481 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
8482 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8483 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8484 (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
8486 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
8487 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8488 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
8489 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8490 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
8491 (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
8494 let Predicates = [HasBWI] in {
8495 def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
8496 (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
8498 let Predicates = [HasAVX512] in {
8499 def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8500 (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
8502 def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
8503 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8504 def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
8505 (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
8507 def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
8508 (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
8510 def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
8511 (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
8513 def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
8514 (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
8518 defm : AVX512_pmovx_patterns<"VPMOVSX", X86vsext, sext_invec, extloadi32i16>;
8519 defm : AVX512_pmovx_patterns<"VPMOVZX", X86vzext, zext_invec, loadi16_anyext>;
8521 //===----------------------------------------------------------------------===//
8522 // GATHER - SCATTER Operations
8524 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8525 X86MemOperand memop, PatFrag GatherNode> {
8526 let Constraints = "@earlyclobber $dst, $src1 = $dst, $mask = $mask_wb",
8527 ExeDomain = _.ExeDomain in
8528 def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst, _.KRCWM:$mask_wb),
8529 (ins _.RC:$src1, _.KRCWM:$mask, memop:$src2),
8530 !strconcat(OpcodeStr#_.Suffix,
8531 "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
8532 [(set _.RC:$dst, _.KRCWM:$mask_wb,
8533 (GatherNode (_.VT _.RC:$src1), _.KRCWM:$mask,
8534 vectoraddr:$src2))]>, EVEX, EVEX_K,
8535 EVEX_CD8<_.EltSize, CD8VT1>;
8538 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
8539 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8540 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512,
8541 vy512mem, mgatherv8i32>, EVEX_V512, VEX_W;
8542 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info512,
8543 vz512mem, mgatherv8i64>, EVEX_V512, VEX_W;
8544 let Predicates = [HasVLX] in {
8545 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8546 vx256xmem, mgatherv4i32>, EVEX_V256, VEX_W;
8547 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info256,
8548 vy256xmem, mgatherv4i64>, EVEX_V256, VEX_W;
8549 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8550 vx128xmem, mgatherv4i32>, EVEX_V128, VEX_W;
8551 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8552 vx128xmem, mgatherv2i64>, EVEX_V128, VEX_W;
8556 multiclass avx512_gather_d_ps<bits<8> dopc, bits<8> qopc,
8557 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8558 defm NAME##D##SUFF##Z: avx512_gather<dopc, OpcodeStr##"d", _.info512, vz512mem,
8559 mgatherv16i32>, EVEX_V512;
8560 defm NAME##Q##SUFF##Z: avx512_gather<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8561 mgatherv8i64>, EVEX_V512;
8562 let Predicates = [HasVLX] in {
8563 defm NAME##D##SUFF##Z256: avx512_gather<dopc, OpcodeStr##"d", _.info256,
8564 vy256xmem, mgatherv8i32>, EVEX_V256;
8565 defm NAME##Q##SUFF##Z256: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8566 vy128xmem, mgatherv4i64>, EVEX_V256;
8567 defm NAME##D##SUFF##Z128: avx512_gather<dopc, OpcodeStr##"d", _.info128,
8568 vx128xmem, mgatherv4i32>, EVEX_V128;
8569 defm NAME##Q##SUFF##Z128: avx512_gather<qopc, OpcodeStr##"q", _.info128,
8570 vx64xmem, X86mgatherv2i64>, EVEX_V128;
8575 defm VGATHER : avx512_gather_q_pd<0x92, 0x93, avx512vl_f64_info, "vgather", "PD">,
8576 avx512_gather_d_ps<0x92, 0x93, avx512vl_f32_info, "vgather", "PS">;
8578 defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q">,
8579 avx512_gather_d_ps<0x90, 0x91, avx512vl_i32_info, "vpgather", "D">;
8581 multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
8582 X86MemOperand memop, PatFrag ScatterNode> {
8584 let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain in
8586 def mr : AVX5128I<opc, MRMDestMem, (outs _.KRCWM:$mask_wb),
8587 (ins memop:$dst, _.KRCWM:$mask, _.RC:$src),
8588 !strconcat(OpcodeStr#_.Suffix,
8589 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
8590 [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
8591 _.KRCWM:$mask, vectoraddr:$dst))]>,
8592 EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
8595 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
8596 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8597 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512,
8598 vy512mem, mscatterv8i32>, EVEX_V512, VEX_W;
8599 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info512,
8600 vz512mem, mscatterv8i64>, EVEX_V512, VEX_W;
8601 let Predicates = [HasVLX] in {
8602 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8603 vx256xmem, mscatterv4i32>, EVEX_V256, VEX_W;
8604 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info256,
8605 vy256xmem, mscatterv4i64>, EVEX_V256, VEX_W;
8606 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8607 vx128xmem, mscatterv4i32>, EVEX_V128, VEX_W;
8608 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8609 vx128xmem, mscatterv2i64>, EVEX_V128, VEX_W;
8613 multiclass avx512_scatter_d_ps<bits<8> dopc, bits<8> qopc,
8614 AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
8615 defm NAME##D##SUFF##Z: avx512_scatter<dopc, OpcodeStr##"d", _.info512, vz512mem,
8616 mscatterv16i32>, EVEX_V512;
8617 defm NAME##Q##SUFF##Z: avx512_scatter<qopc, OpcodeStr##"q", _.info256, vz256xmem,
8618 mscatterv8i64>, EVEX_V512;
8619 let Predicates = [HasVLX] in {
8620 defm NAME##D##SUFF##Z256: avx512_scatter<dopc, OpcodeStr##"d", _.info256,
8621 vy256xmem, mscatterv8i32>, EVEX_V256;
8622 defm NAME##Q##SUFF##Z256: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8623 vy128xmem, mscatterv4i64>, EVEX_V256;
8624 defm NAME##D##SUFF##Z128: avx512_scatter<dopc, OpcodeStr##"d", _.info128,
8625 vx128xmem, mscatterv4i32>, EVEX_V128;
8626 defm NAME##Q##SUFF##Z128: avx512_scatter<qopc, OpcodeStr##"q", _.info128,
8627 vx64xmem, mscatterv2i64>, EVEX_V128;
8631 defm VSCATTER : avx512_scatter_q_pd<0xA2, 0xA3, avx512vl_f64_info, "vscatter", "PD">,
8632 avx512_scatter_d_ps<0xA2, 0xA3, avx512vl_f32_info, "vscatter", "PS">;
8634 defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter", "Q">,
8635 avx512_scatter_d_ps<0xA0, 0xA1, avx512vl_i32_info, "vpscatter", "D">;
8638 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
8639 RegisterClass KRC, X86MemOperand memop> {
8640 let Predicates = [HasPFI], hasSideEffects = 1 in
8641 def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
8642 !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
8646 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
8647 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8649 defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
8650 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8652 defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
8653 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8655 defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
8656 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8658 defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
8659 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8661 defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
8662 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8664 defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
8665 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8667 defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
8668 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8670 defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
8671 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8673 defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
8674 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8676 defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
8677 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8679 defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
8680 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8682 defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
8683 VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
8685 defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
8686 VK8WM, vz256xmem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
8688 defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
8689 VK8WM, vy512mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
8691 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
8692 VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
8694 // Helper fragments to match sext vXi1 to vXiY.
8695 def v64i1sextv64i8 : PatLeaf<(v64i8
8698 (bc_v64i8 (v16i32 immAllZerosV)),
8700 def v32i1sextv32i16 : PatLeaf<(v32i16 (X86vsrai VR512:$src, (i8 15)))>;
8701 def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
8702 def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
8704 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
8705 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
8706 !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
8707 [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
8710 // Use 512bit version to implement 128/256 bit in case NoVLX.
8711 multiclass avx512_convert_mask_to_vector_lowering<X86VectorVTInfo X86Info,
8712 X86VectorVTInfo _> {
8714 def : Pat<(X86Info.VT (X86vsext (X86Info.KVT X86Info.KRC:$src))),
8715 (X86Info.VT (EXTRACT_SUBREG
8716 (_.VT (!cast<Instruction>(NAME#"Zrr")
8717 (_.KVT (COPY_TO_REGCLASS X86Info.KRC:$src,_.KRC)))),
8718 X86Info.SubRegIdx))>;
8721 multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
8722 string OpcodeStr, Predicate prd> {
8723 let Predicates = [prd] in
8724 defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
8726 let Predicates = [prd, HasVLX] in {
8727 defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
8728 defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
8730 let Predicates = [prd, NoVLX] in {
8731 defm Z256_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info256,VTInfo.info512>;
8732 defm Z128_Alt : avx512_convert_mask_to_vector_lowering<VTInfo.info128,VTInfo.info512>;
8737 defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
8738 defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
8739 defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
8740 defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
8742 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
8743 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
8744 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
8745 [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))]>, EVEX;
8748 // Use 512bit version to implement 128/256 bit in case NoVLX.
8749 multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo,
8750 X86VectorVTInfo _> {
8752 def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))),
8753 (_.KVT (COPY_TO_REGCLASS
8754 (!cast<Instruction>(NAME#"Zrr")
8755 (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)),
8756 _.RC:$src, _.SubRegIdx)),
8760 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
8761 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
8762 let Predicates = [prd] in
8763 defm Z : convert_vector_to_mask_common <opc, VTInfo.info512, OpcodeStr>,
8766 let Predicates = [prd, HasVLX] in {
8767 defm Z256 : convert_vector_to_mask_common<opc, VTInfo.info256, OpcodeStr>,
8769 defm Z128 : convert_vector_to_mask_common<opc, VTInfo.info128, OpcodeStr>,
8772 let Predicates = [prd, NoVLX] in {
8773 defm Z256_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info256>;
8774 defm Z128_Alt : convert_vector_to_mask_lowering<VTInfo.info512, VTInfo.info128>;
8778 defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
8779 avx512vl_i8_info, HasBWI>;
8780 defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
8781 avx512vl_i16_info, HasBWI>, VEX_W;
8782 defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
8783 avx512vl_i32_info, HasDQI>;
8784 defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
8785 avx512vl_i64_info, HasDQI>, VEX_W;
8787 //===----------------------------------------------------------------------===//
8788 // AVX-512 - COMPRESS and EXPAND
8791 multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
8793 defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
8794 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8795 (_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
8797 let mayStore = 1, hasSideEffects = 0 in
8798 def mr : AVX5128I<opc, MRMDestMem, (outs),
8799 (ins _.MemOp:$dst, _.RC:$src),
8800 OpcodeStr # "\t{$src, $dst|$dst, $src}",
8801 []>, EVEX_CD8<_.EltSize, CD8VT1>;
8803 def mrk : AVX5128I<opc, MRMDestMem, (outs),
8804 (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
8805 OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
8807 EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
8810 multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
8812 def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
8814 (!cast<Instruction>(NAME#_.ZSuffix##mrk)
8815 addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
8818 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
8819 AVX512VLVectorVTInfo VTInfo> {
8820 defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr>,
8821 compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8823 let Predicates = [HasVLX] in {
8824 defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr>,
8825 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8826 defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr>,
8827 compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8831 defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>,
8833 defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>,
8835 defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>,
8837 defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>,
8841 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
8843 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8844 (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
8845 (_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
8847 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8848 (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1",
8849 (_.VT (X86expand (_.VT (bitconvert
8850 (_.LdFrag addr:$src1)))))>,
8851 AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
8854 multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
8856 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
8857 (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
8858 _.KRCWM:$mask, addr:$src)>;
8860 def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
8861 (_.VT _.RC:$src0))),
8862 (!cast<Instruction>(NAME#_.ZSuffix##rmk)
8863 _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
8866 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
8867 AVX512VLVectorVTInfo VTInfo> {
8868 defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>,
8869 expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
8871 let Predicates = [HasVLX] in {
8872 defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>,
8873 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
8874 defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>,
8875 expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
8879 defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", avx512vl_i32_info>,
8881 defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", avx512vl_i64_info>,
8883 defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
8885 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
8888 //handle instruction reg_vec1 = op(reg_vec,imm)
8890 // op(broadcast(eltVt),imm)
8891 //all instruction created with FROUND_CURRENT
8892 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8894 let ExeDomain = _.ExeDomain in {
8895 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8896 (ins _.RC:$src1, i32u8imm:$src2),
8897 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8898 (OpNode (_.VT _.RC:$src1),
8900 (i32 FROUND_CURRENT))>;
8901 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8902 (ins _.MemOp:$src1, i32u8imm:$src2),
8903 OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
8904 (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
8906 (i32 FROUND_CURRENT))>;
8907 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8908 (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
8909 OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
8910 "${src1}"##_.BroadcastStr##", $src2",
8911 (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
8913 (i32 FROUND_CURRENT))>, EVEX_B;
8917 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
8918 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
8919 SDNode OpNode, X86VectorVTInfo _>{
8920 let ExeDomain = _.ExeDomain in
8921 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8922 (ins _.RC:$src1, i32u8imm:$src2),
8923 OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
8924 "$src1, {sae}, $src2",
8925 (OpNode (_.VT _.RC:$src1),
8927 (i32 FROUND_NO_EXC))>, EVEX_B;
8930 multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
8931 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
8932 let Predicates = [prd] in {
8933 defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
8934 avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
8937 let Predicates = [prd, HasVLX] in {
8938 defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
8940 defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
8945 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8946 // op(reg_vec2,mem_vec,imm)
8947 // op(reg_vec2,broadcast(eltVt),imm)
8948 //all instruction created with FROUND_CURRENT
8949 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
8951 let ExeDomain = _.ExeDomain in {
8952 defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
8953 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
8954 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8955 (OpNode (_.VT _.RC:$src1),
8958 (i32 FROUND_CURRENT))>;
8959 defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8960 (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
8961 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8962 (OpNode (_.VT _.RC:$src1),
8963 (_.VT (bitconvert (_.LdFrag addr:$src2))),
8965 (i32 FROUND_CURRENT))>;
8966 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
8967 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
8968 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
8969 "$src1, ${src2}"##_.BroadcastStr##", $src3",
8970 (OpNode (_.VT _.RC:$src1),
8971 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
8973 (i32 FROUND_CURRENT))>, EVEX_B;
8977 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8978 // op(reg_vec2,mem_vec,imm)
8979 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
8980 X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
8981 let ExeDomain = DestInfo.ExeDomain in {
8982 defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
8983 (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
8984 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8985 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
8986 (SrcInfo.VT SrcInfo.RC:$src2),
8988 defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
8989 (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
8990 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
8991 (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
8992 (SrcInfo.VT (bitconvert
8993 (SrcInfo.LdFrag addr:$src2))),
8998 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
8999 // op(reg_vec2,mem_vec,imm)
9000 // op(reg_vec2,broadcast(eltVt),imm)
9001 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
9003 avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
9005 let ExeDomain = _.ExeDomain in
9006 defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9007 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9008 OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
9009 "$src1, ${src2}"##_.BroadcastStr##", $src3",
9010 (OpNode (_.VT _.RC:$src1),
9011 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
9012 (i8 imm:$src3))>, EVEX_B;
9015 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
9016 // op(reg_vec2,mem_scalar,imm)
9017 //all instruction created with FROUND_CURRENT
9018 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9019 X86VectorVTInfo _> {
9020 let ExeDomain = _.ExeDomain in {
9021 defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9022 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9023 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9024 (OpNode (_.VT _.RC:$src1),
9027 (i32 FROUND_CURRENT))>;
9028 defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
9029 (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
9030 OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
9031 (OpNode (_.VT _.RC:$src1),
9032 (_.VT (scalar_to_vector
9033 (_.ScalarLdFrag addr:$src2))),
9035 (i32 FROUND_CURRENT))>;
9039 //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9040 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
9041 SDNode OpNode, X86VectorVTInfo _>{
9042 let ExeDomain = _.ExeDomain in
9043 defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9044 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9045 OpcodeStr, "$src3, {sae}, $src2, $src1",
9046 "$src1, $src2, {sae}, $src3",
9047 (OpNode (_.VT _.RC:$src1),
9050 (i32 FROUND_NO_EXC))>, EVEX_B;
9052 //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
9053 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
9054 SDNode OpNode, X86VectorVTInfo _> {
9055 let ExeDomain = _.ExeDomain in
9056 defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
9057 (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
9058 OpcodeStr, "$src3, {sae}, $src2, $src1",
9059 "$src1, $src2, {sae}, $src3",
9060 (OpNode (_.VT _.RC:$src1),
9063 (i32 FROUND_NO_EXC))>, EVEX_B;
9066 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
9067 AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
9068 let Predicates = [prd] in {
9069 defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
9070 avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
9074 let Predicates = [prd, HasVLX] in {
9075 defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
9077 defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
9082 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
9083 AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
9084 let Predicates = [HasBWI] in {
9085 defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
9086 SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
9088 let Predicates = [HasBWI, HasVLX] in {
9089 defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
9090 SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
9091 defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
9092 SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
9096 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
9097 bits<8> opc, SDNode OpNode>{
9098 let Predicates = [HasAVX512] in {
9099 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
9101 let Predicates = [HasAVX512, HasVLX] in {
9102 defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
9103 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
9107 multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
9108 X86VectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
9109 let Predicates = [prd] in {
9110 defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, _>,
9111 avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNode, _>;
9115 multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
9116 bits<8> opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{
9117 defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
9118 opcPs, OpNode, prd>, EVEX_CD8<32, CD8VF>;
9119 defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
9120 opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
9124 defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
9125 X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
9126 defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
9127 X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX;
9128 defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
9129 X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX;
9132 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
9133 0x50, X86VRange, HasDQI>,
9134 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9135 defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
9136 0x50, X86VRange, HasDQI>,
9137 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9139 defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", f64x_info,
9140 0x51, X86VRange, HasDQI>,
9141 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9142 defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
9143 0x51, X86VRange, HasDQI>,
9144 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9146 defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
9147 0x57, X86Reduces, HasDQI>,
9148 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9149 defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
9150 0x57, X86Reduces, HasDQI>,
9151 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9153 defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
9154 0x27, X86GetMants, HasAVX512>,
9155 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
9156 defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
9157 0x27, X86GetMants, HasAVX512>,
9158 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
9160 multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
9161 bits<8> opc, SDNode OpNode = X86Shuf128>{
9162 let Predicates = [HasAVX512] in {
9163 defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
9166 let Predicates = [HasAVX512, HasVLX] in {
9167 defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
9170 let Predicates = [HasAVX512] in {
9171 def : Pat<(v16f32 (ffloor VR512:$src)),
9172 (VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
9173 def : Pat<(v16f32 (fnearbyint VR512:$src)),
9174 (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9175 def : Pat<(v16f32 (fceil VR512:$src)),
9176 (VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
9177 def : Pat<(v16f32 (frint VR512:$src)),
9178 (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9179 def : Pat<(v16f32 (ftrunc VR512:$src)),
9180 (VRNDSCALEPSZrri VR512:$src, (i32 0xB))>;
9182 def : Pat<(v8f64 (ffloor VR512:$src)),
9183 (VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
9184 def : Pat<(v8f64 (fnearbyint VR512:$src)),
9185 (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9186 def : Pat<(v8f64 (fceil VR512:$src)),
9187 (VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
9188 def : Pat<(v8f64 (frint VR512:$src)),
9189 (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9190 def : Pat<(v8f64 (ftrunc VR512:$src)),
9191 (VRNDSCALEPDZrri VR512:$src, (i32 0xB))>;
9194 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
9195 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9196 defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>,
9197 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9198 defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
9199 AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
9200 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
9201 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
9203 let Predicates = [HasAVX512] in {
9204 // Provide fallback in case the load node that is used in the broadcast
9205 // patterns above is used by additional users, which prevents the pattern
9207 def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
9208 (VSHUFF64X2Zrri (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9209 (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9211 def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
9212 (VSHUFI64X2Zrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9213 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9216 def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
9217 (VSHUFF32X4Zrri (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9218 (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9220 def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
9221 (VSHUFI32X4Zrri (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9222 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9225 def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
9226 (VSHUFI32X4Zrri (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9227 (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9230 def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
9231 (VSHUFI32X4Zrri (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9232 (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
9236 multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
9237 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
9238 AVX512AIi8Base, EVEX_4V;
9241 defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>,
9242 EVEX_CD8<32, CD8VF>;
9243 defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
9244 EVEX_CD8<64, CD8VF>, VEX_W;
9246 multiclass avx512_vpalignr_lowering<X86VectorVTInfo _ , list<Predicate> p>{
9247 let Predicates = p in
9248 def NAME#_.VTName#rri:
9249 Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
9250 (!cast<Instruction>(NAME#_.ZSuffix#rri)
9251 _.RC:$src1, _.RC:$src2, imm:$imm)>;
9254 multiclass avx512_vpalignr_lowering_common<AVX512VLVectorVTInfo _>:
9255 avx512_vpalignr_lowering<_.info512, [HasBWI]>,
9256 avx512_vpalignr_lowering<_.info128, [HasBWI, HasVLX]>,
9257 avx512_vpalignr_lowering<_.info256, [HasBWI, HasVLX]>;
9259 defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
9260 avx512vl_i8_info, avx512vl_i8_info>,
9261 avx512_vpalignr_lowering_common<avx512vl_i16_info>,
9262 avx512_vpalignr_lowering_common<avx512vl_i32_info>,
9263 avx512_vpalignr_lowering_common<avx512vl_f32_info>,
9264 avx512_vpalignr_lowering_common<avx512vl_i64_info>,
9265 avx512_vpalignr_lowering_common<avx512vl_f64_info>,
9268 defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
9269 avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
9271 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
9272 X86VectorVTInfo _> {
9273 let ExeDomain = _.ExeDomain in {
9274 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9275 (ins _.RC:$src1), OpcodeStr,
9277 (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
9279 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9280 (ins _.MemOp:$src1), OpcodeStr,
9282 (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
9283 EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
9287 multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
9288 X86VectorVTInfo _> :
9289 avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
9290 defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9291 (ins _.ScalarMemOp:$src1), OpcodeStr,
9292 "${src1}"##_.BroadcastStr,
9293 "${src1}"##_.BroadcastStr,
9294 (_.VT (OpNode (X86VBroadcast
9295 (_.ScalarLdFrag addr:$src1))))>,
9296 EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
9299 multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9300 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9301 let Predicates = [prd] in
9302 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
9304 let Predicates = [prd, HasVLX] in {
9305 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
9307 defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info128>,
9312 multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
9313 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
9314 let Predicates = [prd] in
9315 defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
9318 let Predicates = [prd, HasVLX] in {
9319 defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
9321 defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
9326 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
9327 SDNode OpNode, Predicate prd> {
9328 defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, avx512vl_i64_info,
9330 defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, avx512vl_i32_info,
9334 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
9335 SDNode OpNode, Predicate prd> {
9336 defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, avx512vl_i16_info, prd>;
9337 defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, avx512vl_i8_info, prd>;
9340 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
9341 bits<8> opc_d, bits<8> opc_q,
9342 string OpcodeStr, SDNode OpNode> {
9343 defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode,
9345 avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode,
9349 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs>;
9351 // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX.
9352 let Predicates = [HasAVX512, NoVLX] in {
9353 def : Pat<(v4i64 (abs VR256X:$src)),
9356 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9358 def : Pat<(v2i64 (abs VR128X:$src)),
9361 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9365 multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
9367 defm NAME : avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
9370 defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
9371 defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
9373 // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX.
9374 let Predicates = [HasCDI, NoVLX] in {
9375 def : Pat<(v4i64 (ctlz VR256X:$src)),
9378 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9380 def : Pat<(v2i64 (ctlz VR128X:$src)),
9383 (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9386 def : Pat<(v8i32 (ctlz VR256X:$src)),
9389 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)),
9391 def : Pat<(v4i32 (ctlz VR128X:$src)),
9394 (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)),
9398 //===---------------------------------------------------------------------===//
9399 // Counts number of ones - VPOPCNTD and VPOPCNTQ
9400 //===---------------------------------------------------------------------===//
9402 multiclass avx512_unary_rmb_popcnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo VTInfo> {
9403 let Predicates = [HasVPOPCNTDQ] in
9404 defm Z : avx512_unary_rmb<opc, OpcodeStr, ctpop, VTInfo>, EVEX_V512;
9407 // Use 512bit version to implement 128/256 bit.
9408 multiclass avx512_unary_lowering<SDNode OpNode, AVX512VLVectorVTInfo _, Predicate prd> {
9409 let Predicates = [prd] in {
9410 def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
9412 (!cast<Instruction>(NAME # "Zrr")
9413 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9415 _.info256.SubRegIdx)),
9416 _.info256.SubRegIdx)>;
9418 def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
9420 (!cast<Instruction>(NAME # "Zrr")
9421 (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
9423 _.info128.SubRegIdx)),
9424 _.info128.SubRegIdx)>;
9428 defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>,
9429 avx512_unary_lowering<ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
9430 defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>,
9431 avx512_unary_lowering<ctpop, avx512vl_i64_info, HasVPOPCNTDQ>, VEX_W;
9433 //===---------------------------------------------------------------------===//
9434 // Replicate Single FP - MOVSHDUP and MOVSLDUP
9435 //===---------------------------------------------------------------------===//
9436 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
9437 defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_f32_info,
9441 defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
9442 defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
9444 //===----------------------------------------------------------------------===//
9445 // AVX-512 - MOVDDUP
9446 //===----------------------------------------------------------------------===//
9448 multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
9449 X86VectorVTInfo _> {
9450 let ExeDomain = _.ExeDomain in {
9451 defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
9452 (ins _.RC:$src), OpcodeStr, "$src", "$src",
9453 (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
9454 defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
9455 (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
9456 (_.VT (OpNode (_.VT (scalar_to_vector
9457 (_.ScalarLdFrag addr:$src)))))>,
9458 EVEX, EVEX_CD8<_.EltSize, CD8VH>;
9462 multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
9463 AVX512VLVectorVTInfo VTInfo> {
9465 defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
9467 let Predicates = [HasAVX512, HasVLX] in {
9468 defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
9470 defm Z128 : avx512_movddup_128<opc, OpcodeStr, OpNode, VTInfo.info128>,
9475 multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
9476 defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode,
9477 avx512vl_f64_info>, XD, VEX_W;
9480 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
9482 let Predicates = [HasVLX] in {
9483 def : Pat<(X86Movddup (loadv2f64 addr:$src)),
9484 (VMOVDDUPZ128rm addr:$src)>;
9485 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
9486 (VMOVDDUPZ128rm addr:$src)>;
9487 def : Pat<(v2f64 (X86VBroadcast f64:$src)),
9488 (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9490 def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
9491 (v2f64 VR128X:$src0)),
9492 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9493 def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)),
9494 (bitconvert (v4i32 immAllZerosV))),
9495 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9497 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9498 (v2f64 VR128X:$src0)),
9499 (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask,
9500 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9501 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)),
9502 (bitconvert (v4i32 immAllZerosV))),
9503 (VMOVDDUPZ128rrkz VK2WM:$mask, (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
9505 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9506 (v2f64 VR128X:$src0)),
9507 (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
9508 def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))),
9509 (bitconvert (v4i32 immAllZerosV))),
9510 (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>;
9513 //===----------------------------------------------------------------------===//
9514 // AVX-512 - Unpack Instructions
9515 //===----------------------------------------------------------------------===//
9516 defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
9518 defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
9521 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
9522 SSE_INTALU_ITINS_P, HasBWI>;
9523 defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
9524 SSE_INTALU_ITINS_P, HasBWI>;
9525 defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
9526 SSE_INTALU_ITINS_P, HasBWI>;
9527 defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
9528 SSE_INTALU_ITINS_P, HasBWI>;
9530 defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
9531 SSE_INTALU_ITINS_P, HasAVX512>;
9532 defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
9533 SSE_INTALU_ITINS_P, HasAVX512>;
9534 defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
9535 SSE_INTALU_ITINS_P, HasAVX512>;
9536 defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
9537 SSE_INTALU_ITINS_P, HasAVX512>;
9539 //===----------------------------------------------------------------------===//
9540 // AVX-512 - Extract & Insert Integer Instructions
9541 //===----------------------------------------------------------------------===//
9543 multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9544 X86VectorVTInfo _> {
9545 def mr : AVX512Ii8<opc, MRMDestMem, (outs),
9546 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9547 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9548 [(store (_.EltVT (trunc (assertzext (OpNode (_.VT _.RC:$src1),
9551 EVEX, EVEX_CD8<_.EltSize, CD8VT1>;
9554 multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
9555 let Predicates = [HasBWI] in {
9556 def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
9557 (ins _.RC:$src1, u8imm:$src2),
9558 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9559 [(set GR32orGR64:$dst,
9560 (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
9563 defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
9567 multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
9568 let Predicates = [HasBWI] in {
9569 def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
9570 (ins _.RC:$src1, u8imm:$src2),
9571 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9572 [(set GR32orGR64:$dst,
9573 (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
9576 let hasSideEffects = 0 in
9577 def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
9578 (ins _.RC:$src1, u8imm:$src2),
9579 OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
9580 EVEX, TAPD, FoldGenData<NAME#rr>;
9582 defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
9586 multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
9587 RegisterClass GRC> {
9588 let Predicates = [HasDQI] in {
9589 def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
9590 (ins _.RC:$src1, u8imm:$src2),
9591 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9593 (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
9596 def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
9597 (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
9598 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
9599 [(store (extractelt (_.VT _.RC:$src1),
9600 imm:$src2),addr:$dst)]>,
9601 EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD;
9605 defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>;
9606 defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>;
9607 defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
9608 defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
9610 multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
9611 X86VectorVTInfo _, PatFrag LdFrag> {
9612 def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
9613 (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
9614 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9616 (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
9617 EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
9620 multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
9621 X86VectorVTInfo _, PatFrag LdFrag> {
9622 let Predicates = [HasBWI] in {
9623 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9624 (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
9625 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9627 (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V;
9629 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
9633 multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
9634 X86VectorVTInfo _, RegisterClass GRC> {
9635 let Predicates = [HasDQI] in {
9636 def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
9637 (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
9638 OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
9640 (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
9643 defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
9644 _.ScalarLdFrag>, TAPD;
9648 defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
9650 defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
9652 defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
9653 defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
9654 //===----------------------------------------------------------------------===//
9655 // VSHUFPS - VSHUFPD Operations
9656 //===----------------------------------------------------------------------===//
9657 multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
9658 AVX512VLVectorVTInfo VTInfo_FP>{
9659 defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
9660 EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
9661 AVX512AIi8Base, EVEX_4V;
9664 defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
9665 defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
9666 //===----------------------------------------------------------------------===//
9667 // AVX-512 - Byte shift Left/Right
9668 //===----------------------------------------------------------------------===//
9670 multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
9671 Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
9672 def rr : AVX512<opc, MRMr,
9673 (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
9674 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9675 [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
9676 def rm : AVX512<opc, MRMm,
9677 (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
9678 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9679 [(set _.RC:$dst,(_.VT (OpNode
9680 (_.VT (bitconvert (_.LdFrag addr:$src1))),
9681 (i8 imm:$src2))))]>;
9684 multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
9685 Format MRMm, string OpcodeStr, Predicate prd>{
9686 let Predicates = [prd] in
9687 defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9688 OpcodeStr, v64i8_info>, EVEX_V512;
9689 let Predicates = [prd, HasVLX] in {
9690 defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9691 OpcodeStr, v32i8x_info>, EVEX_V256;
9692 defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm,
9693 OpcodeStr, v16i8x_info>, EVEX_V128;
9696 defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
9697 HasBWI>, AVX512PDIi8Base, EVEX_4V;
9698 defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
9699 HasBWI>, AVX512PDIi8Base, EVEX_4V;
9702 multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
9703 string OpcodeStr, X86VectorVTInfo _dst,
9704 X86VectorVTInfo _src>{
9705 def rr : AVX512BI<opc, MRMSrcReg,
9706 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
9707 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9708 [(set _dst.RC:$dst,(_dst.VT
9709 (OpNode (_src.VT _src.RC:$src1),
9710 (_src.VT _src.RC:$src2))))]>;
9711 def rm : AVX512BI<opc, MRMSrcMem,
9712 (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
9713 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
9714 [(set _dst.RC:$dst,(_dst.VT
9715 (OpNode (_src.VT _src.RC:$src1),
9716 (_src.VT (bitconvert
9717 (_src.LdFrag addr:$src2))))))]>;
9720 multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
9721 string OpcodeStr, Predicate prd> {
9722 let Predicates = [prd] in
9723 defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v8i64_info,
9724 v64i8_info>, EVEX_V512;
9725 let Predicates = [prd, HasVLX] in {
9726 defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v4i64x_info,
9727 v32i8x_info>, EVEX_V256;
9728 defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v2i64x_info,
9729 v16i8x_info>, EVEX_V128;
9733 defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
9736 // Transforms to swizzle an immediate to enable better matching when
9737 // memory operand isn't in the right place.
9738 def VPTERNLOG321_imm8 : SDNodeXForm<imm, [{
9739 // Convert a VPTERNLOG immediate by swapping operand 0 and operand 2.
9740 uint8_t Imm = N->getZExtValue();
9741 // Swap bits 1/4 and 3/6.
9742 uint8_t NewImm = Imm & 0xa5;
9743 if (Imm & 0x02) NewImm |= 0x10;
9744 if (Imm & 0x10) NewImm |= 0x02;
9745 if (Imm & 0x08) NewImm |= 0x40;
9746 if (Imm & 0x40) NewImm |= 0x08;
9747 return getI8Imm(NewImm, SDLoc(N));
9749 def VPTERNLOG213_imm8 : SDNodeXForm<imm, [{
9750 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9751 uint8_t Imm = N->getZExtValue();
9752 // Swap bits 2/4 and 3/5.
9753 uint8_t NewImm = Imm & 0xc3;
9754 if (Imm & 0x04) NewImm |= 0x10;
9755 if (Imm & 0x10) NewImm |= 0x04;
9756 if (Imm & 0x08) NewImm |= 0x20;
9757 if (Imm & 0x20) NewImm |= 0x08;
9758 return getI8Imm(NewImm, SDLoc(N));
9760 def VPTERNLOG132_imm8 : SDNodeXForm<imm, [{
9761 // Convert a VPTERNLOG immediate by swapping operand 1 and operand 2.
9762 uint8_t Imm = N->getZExtValue();
9763 // Swap bits 1/2 and 5/6.
9764 uint8_t NewImm = Imm & 0x99;
9765 if (Imm & 0x02) NewImm |= 0x04;
9766 if (Imm & 0x04) NewImm |= 0x02;
9767 if (Imm & 0x20) NewImm |= 0x40;
9768 if (Imm & 0x40) NewImm |= 0x20;
9769 return getI8Imm(NewImm, SDLoc(N));
9771 def VPTERNLOG231_imm8 : SDNodeXForm<imm, [{
9772 // Convert a VPTERNLOG immediate by moving operand 1 to the end.
9773 uint8_t Imm = N->getZExtValue();
9774 // Move bits 1->2, 2->4, 3->6, 4->1, 5->3, 6->5
9775 uint8_t NewImm = Imm & 0x81;
9776 if (Imm & 0x02) NewImm |= 0x04;
9777 if (Imm & 0x04) NewImm |= 0x10;
9778 if (Imm & 0x08) NewImm |= 0x40;
9779 if (Imm & 0x10) NewImm |= 0x02;
9780 if (Imm & 0x20) NewImm |= 0x08;
9781 if (Imm & 0x40) NewImm |= 0x20;
9782 return getI8Imm(NewImm, SDLoc(N));
9784 def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{
9785 // Convert a VPTERNLOG immediate by moving operand 2 to the beginning.
9786 uint8_t Imm = N->getZExtValue();
9787 // Move bits 1->4, 2->1, 3->5, 4->2, 5->6, 6->3
9788 uint8_t NewImm = Imm & 0x81;
9789 if (Imm & 0x02) NewImm |= 0x10;
9790 if (Imm & 0x04) NewImm |= 0x02;
9791 if (Imm & 0x08) NewImm |= 0x20;
9792 if (Imm & 0x10) NewImm |= 0x04;
9793 if (Imm & 0x20) NewImm |= 0x40;
9794 if (Imm & 0x40) NewImm |= 0x08;
9795 return getI8Imm(NewImm, SDLoc(N));
9798 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
9800 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
9801 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
9802 (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
9803 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
9804 (OpNode (_.VT _.RC:$src1),
9807 (i8 imm:$src4)), 1, 1>, AVX512AIi8Base, EVEX_4V;
9808 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
9809 (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
9810 OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
9811 (OpNode (_.VT _.RC:$src1),
9813 (_.VT (bitconvert (_.LdFrag addr:$src3))),
9814 (i8 imm:$src4)), 1, 0>,
9815 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
9816 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
9817 (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
9818 OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
9819 "$src2, ${src3}"##_.BroadcastStr##", $src4",
9820 (OpNode (_.VT _.RC:$src1),
9822 (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
9823 (i8 imm:$src4)), 1, 0>, EVEX_B,
9824 AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
9825 }// Constraints = "$src1 = $dst"
9827 // Additional patterns for matching passthru operand in other positions.
9828 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9829 (OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9831 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
9832 _.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9833 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9834 (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
9836 (!cast<Instruction>(NAME#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
9837 _.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
9839 // Additional patterns for matching loads in other positions.
9840 def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
9841 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
9842 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
9843 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9844 def : Pat<(_.VT (OpNode _.RC:$src1,
9845 (bitconvert (_.LdFrag addr:$src3)),
9846 _.RC:$src2, (i8 imm:$src4))),
9847 (!cast<Instruction>(NAME#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
9848 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9850 // Additional patterns for matching zero masking with loads in other
9852 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9853 (OpNode (bitconvert (_.LdFrag addr:$src3)),
9854 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9856 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
9857 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9858 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9859 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
9860 _.RC:$src2, (i8 imm:$src4)),
9862 (!cast<Instruction>(NAME#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
9863 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9865 // Additional patterns for matching masked loads with different
9867 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9868 (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
9869 _.RC:$src2, (i8 imm:$src4)),
9871 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9872 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9873 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9874 (OpNode (bitconvert (_.LdFrag addr:$src3)),
9875 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9877 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9878 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9879 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9880 (OpNode _.RC:$src2, _.RC:$src1,
9881 (bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
9883 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9884 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
9885 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9886 (OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
9887 _.RC:$src1, (i8 imm:$src4)),
9889 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9890 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
9891 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9892 (OpNode (bitconvert (_.LdFrag addr:$src3)),
9893 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
9895 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9896 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
9898 // Additional patterns for matching broadcasts in other positions.
9899 def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
9900 _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
9901 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
9902 addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9903 def : Pat<(_.VT (OpNode _.RC:$src1,
9904 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
9905 _.RC:$src2, (i8 imm:$src4))),
9906 (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
9907 addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9909 // Additional patterns for matching zero masking with broadcasts in other
9911 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9912 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
9913 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9915 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
9916 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
9917 (VPTERNLOG321_imm8 imm:$src4))>;
9918 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9920 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
9921 _.RC:$src2, (i8 imm:$src4)),
9923 (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
9924 _.KRCWM:$mask, _.RC:$src2, addr:$src3,
9925 (VPTERNLOG132_imm8 imm:$src4))>;
9927 // Additional patterns for matching masked broadcasts with different
9929 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9931 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
9932 _.RC:$src2, (i8 imm:$src4)),
9934 (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
9935 _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
9936 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9937 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
9938 _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
9940 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9941 _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
9942 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9943 (OpNode _.RC:$src2, _.RC:$src1,
9944 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
9945 (i8 imm:$src4)), _.RC:$src1)),
9946 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9947 _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
9948 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9950 (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
9951 _.RC:$src1, (i8 imm:$src4)),
9953 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9954 _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
9955 def : Pat<(_.VT (vselect _.KRCWM:$mask,
9956 (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
9957 _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
9959 (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
9960 _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
9963 multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
9964 let Predicates = [HasAVX512] in
9965 defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
9966 let Predicates = [HasAVX512, HasVLX] in {
9967 defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
9968 defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
9972 defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
9973 defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
9975 //===----------------------------------------------------------------------===//
9976 // AVX-512 - FixupImm
9977 //===----------------------------------------------------------------------===//
9979 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
9981 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
9982 defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
9983 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
9984 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
9985 (OpNode (_.VT _.RC:$src1),
9987 (_.IntVT _.RC:$src3),
9989 (i32 FROUND_CURRENT))>;
9990 defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
9991 (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
9992 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
9993 (OpNode (_.VT _.RC:$src1),
9995 (_.IntVT (bitconvert (_.LdFrag addr:$src3))),
9997 (i32 FROUND_CURRENT))>;
9998 defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
9999 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10000 OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2",
10001 "$src2, ${src3}"##_.BroadcastStr##", $src4",
10002 (OpNode (_.VT _.RC:$src1),
10004 (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
10006 (i32 FROUND_CURRENT))>, EVEX_B;
10007 } // Constraints = "$src1 = $dst"
10010 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
10011 SDNode OpNode, X86VectorVTInfo _>{
10012 let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
10013 defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
10014 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10015 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10016 "$src2, $src3, {sae}, $src4",
10017 (OpNode (_.VT _.RC:$src1),
10019 (_.IntVT _.RC:$src3),
10021 (i32 FROUND_NO_EXC))>, EVEX_B;
10025 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
10026 X86VectorVTInfo _, X86VectorVTInfo _src3VT> {
10027 let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
10028 ExeDomain = _.ExeDomain in {
10029 defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10030 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10031 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10032 (OpNode (_.VT _.RC:$src1),
10034 (_src3VT.VT _src3VT.RC:$src3),
10036 (i32 FROUND_CURRENT))>;
10038 defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
10039 (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
10040 OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
10041 "$src2, $src3, {sae}, $src4",
10042 (OpNode (_.VT _.RC:$src1),
10044 (_src3VT.VT _src3VT.RC:$src3),
10046 (i32 FROUND_NO_EXC))>, EVEX_B;
10047 defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
10048 (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
10049 OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
10050 (OpNode (_.VT _.RC:$src1),
10052 (_src3VT.VT (scalar_to_vector
10053 (_src3VT.ScalarLdFrag addr:$src3))),
10055 (i32 FROUND_CURRENT))>;
10059 multiclass avx512_fixupimm_packed_all<AVX512VLVectorVTInfo _Vec>{
10060 let Predicates = [HasAVX512] in
10061 defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
10062 avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, _Vec.info512>,
10063 AVX512AIi8Base, EVEX_4V, EVEX_V512;
10064 let Predicates = [HasAVX512, HasVLX] in {
10065 defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info128>,
10066 AVX512AIi8Base, EVEX_4V, EVEX_V128;
10067 defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, _Vec.info256>,
10068 AVX512AIi8Base, EVEX_4V, EVEX_V256;
10072 defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10073 f32x_info, v4i32x_info>,
10074 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
10075 defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar,
10076 f64x_info, v2i64x_info>,
10077 AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
10078 defm VFIXUPIMMPS : avx512_fixupimm_packed_all<avx512vl_f32_info>,
10079 EVEX_CD8<32, CD8VF>;
10080 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>,
10081 EVEX_CD8<64, CD8VF>, VEX_W;
10085 // Patterns used to select SSE scalar fp arithmetic instructions from
10088 // (1) a scalar fp operation followed by a blend
10090 // The effect is that the backend no longer emits unnecessary vector
10091 // insert instructions immediately after SSE scalar fp instructions
10092 // like addss or mulss.
10094 // For example, given the following code:
10095 // __m128 foo(__m128 A, __m128 B) {
10100 // Previously we generated:
10101 // addss %xmm0, %xmm1
10102 // movss %xmm1, %xmm0
10104 // We now generate:
10105 // addss %xmm1, %xmm0
10107 // (2) a vector packed single/double fp operation followed by a vector insert
10109 // The effect is that the backend converts the packed fp instruction
10110 // followed by a vector insert into a single SSE scalar fp instruction.
10112 // For example, given the following code:
10113 // __m128 foo(__m128 A, __m128 B) {
10114 // __m128 C = A + B;
10115 // return (__m128) {c[0], a[1], a[2], a[3]};
10118 // Previously we generated:
10119 // addps %xmm0, %xmm1
10120 // movss %xmm1, %xmm0
10122 // We now generate:
10123 // addss %xmm1, %xmm0
10125 // TODO: Some canonicalization in lowering would simplify the number of
10126 // patterns we have to try to match.
10127 multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
10128 let Predicates = [HasAVX512] in {
10129 // extracted scalar math op with insert via movss
10130 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10131 (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10133 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
10134 (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
10136 // extracted scalar math op with insert via blend
10137 def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
10138 (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
10139 FR32X:$src))), (i8 1))),
10140 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
10141 (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
10143 // vector math op with insert via movss
10144 def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
10145 (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
10146 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10148 // vector math op with insert via blend
10149 def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst),
10150 (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)), (i8 1))),
10151 (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
10153 // extracted masked scalar math op with insert via movss
10154 def : Pat<(X86Movss (v4f32 VR128X:$src1),
10156 (X86selects VK1WM:$mask,
10157 (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
10160 (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
10161 VK1WM:$mask, v4f32:$src1,
10162 (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
10166 defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
10167 defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
10168 defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
10169 defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
10171 multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
10172 let Predicates = [HasAVX512] in {
10173 // extracted scalar math op with insert via movsd
10174 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10175 (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10177 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
10178 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10180 // extracted scalar math op with insert via blend
10181 def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
10182 (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
10183 FR64X:$src))), (i8 1))),
10184 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
10185 (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
10187 // vector math op with insert via movsd
10188 def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
10189 (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
10190 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10192 // vector math op with insert via blend
10193 def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst),
10194 (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)), (i8 1))),
10195 (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
10197 // extracted masked scalar math op with insert via movss
10198 def : Pat<(X86Movsd (v2f64 VR128X:$src1),
10200 (X86selects VK1WM:$mask,
10201 (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
10204 (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
10205 VK1WM:$mask, v2f64:$src1,
10206 (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
10210 defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
10211 defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
10212 defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
10213 defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;